diff options
author | Aiden Grossman <aidengrossman@google.com> | 2025-09-12 01:05:48 +0000 |
---|---|---|
committer | Aiden Grossman <aidengrossman@google.com> | 2025-09-12 01:05:48 +0000 |
commit | 88a52e1fc6d3e153132f0e0a86431762adf8c0c4 (patch) | |
tree | bae8c1b720736edc54705c325c5bfb95b459eda2 /clang | |
parent | 1873dd7e8bb03319500a9f4b51e9e498a8fb70de (diff) | |
parent | 2740e4b73682eb7a6869c333991a608304938952 (diff) | |
download | llvm-users/boomanaiden154/main.clang-invoke-shell-script-with-bash.zip llvm-users/boomanaiden154/main.clang-invoke-shell-script-with-bash.tar.gz llvm-users/boomanaiden154/main.clang-invoke-shell-script-with-bash.tar.bz2 |
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.clang-invoke-shell-script-with-bash
Created using spr 1.3.6
[skip ci]
Diffstat (limited to 'clang')
248 files changed, 6919 insertions, 7741 deletions
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 741946e..cb8ea5e 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -366,16 +366,25 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| threadset clause | :part:`in progress` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | |
+| Recording of task graphs | :part:`in progress` | :part:`in progress` | clang: jtb20, flang: kparzysz |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| Parallel inductions | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| init_complete for scan directive | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Loop transformation constructs | :none:`unclaimed` | :none:`unclaimed` | |
+| loop interchange transformation | :good:`done` | :none:`unclaimed` | Clang (interchange): https://github.com/llvm/llvm-project/pull/93022 |
+| | | | Clang (permutation): https://github.com/llvm/llvm-project/pull/92030 |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop reverse transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/92916 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop stripe transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/119891 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fusion transformation | :part:`in progress` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/139293 |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop index set splitting transformation | :none:`unclaimed` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop transformation apply clause | :none:`unclaimed` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| workdistribute construct | | :none:`in progress` | @skc7, @mjklemm |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| task_iteration | :none:`unclaimed` | :none:`unclaimed` | |
@@ -446,7 +455,7 @@ implementation. | Optionally omit array length expression | :good:`done` | :none:`unclaimed` | (Parse) https://github.com/llvm/llvm-project/pull/148048, |
| | | | (Sema) https://github.com/llvm/llvm-project/pull/152786 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Canonical loop sequences | :none:`unclaimed` | :part:`In Progress` | |
+| Canonical loop sequences | :part:`in progress` | :part:`in progress` | Clang: https://github.com/llvm/llvm-project/pull/139293 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| Clarifications to Fortran map semantics | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
@@ -507,6 +516,10 @@ implementation. | | | | https://github.com/llvm/llvm-project/pull/152830 |
| | | | https://github.com/llvm/llvm-project/pull/152831) |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop flatten transformation | :none:`unclaimed` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop grid/tile modifiers for sizes clause | :none:`unclaimed` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
OpenMP Extensions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 56c4697..e1e497c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -279,6 +279,9 @@ Improvements to Clang's diagnostics - The :doc:`ThreadSafetyAnalysis` attributes ``ACQUIRED_BEFORE(...)`` and ``ACQUIRED_AFTER(...)`` have been moved to the stable feature set and no longer require ``-Wthread-safety-beta`` to be used. +- The :doc:`ThreadSafetyAnalysis` gains basic alias-analysis of capability + pointers under ``-Wthread-safety-beta`` (still experimental), which reduces + both false positives but also false negatives through more precise analysis. Improvements to Clang's time-trace ---------------------------------- @@ -308,6 +311,8 @@ Bug Fixes in This Version - Builtin elementwise operators now accept vector arguments that have different qualifiers on their elements. For example, vector of 4 ``const float`` values and vector of 4 ``float`` values. (#GH155405) +- Fixed a failed assertion with a negative limit parameter value inside of + ``__has_embed``. (#GH157842) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -350,6 +355,11 @@ Bug Fixes to C++ Support authentication enabled. (#GH152601) - Fix the check for narrowing int-to-float conversions, so that they are detected in cases where converting the float back to an integer is undefined behaviour (#GH157067). +- Fix a crash when applying binary or ternary operators to two same function types with different spellings, + where at least one of the function parameters has an attribute which affects + the function type. +- Fix an assertion failure when a ``constexpr`` variable is only referenced through + ``__builtin_addressof``, and related issues with builtin arguments. (#GH154034) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h index 994f236..fe388b9 100644 --- a/clang/include/clang/AST/Attr.h +++ b/clang/include/clang/AST/Attr.h @@ -232,6 +232,40 @@ public: } }; +class HLSLSemanticAttr : public HLSLAnnotationAttr { + unsigned SemanticIndex = 0; + LLVM_PREFERRED_TYPE(bool) + unsigned SemanticIndexable : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned SemanticExplicitIndex : 1; + +protected: + HLSLSemanticAttr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed, + bool InheritEvenIfAlreadyPresent, bool SemanticIndexable) + : HLSLAnnotationAttr(Context, CommonInfo, AK, IsLateParsed, + InheritEvenIfAlreadyPresent) { + this->SemanticIndexable = SemanticIndexable; + this->SemanticExplicitIndex = false; + } + +public: + bool isSemanticIndexable() const { return SemanticIndexable; } + + void setSemanticIndex(unsigned SemanticIndex) { + this->SemanticIndex = SemanticIndex; + this->SemanticExplicitIndex = true; + } + + unsigned getSemanticIndex() const { return SemanticIndex; } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Attr *A) { + return A->getKind() >= attr::FirstHLSLSemanticAttr && + A->getKind() <= attr::LastHLSLSemanticAttr; + } +}; + /// A parameter attribute which changes the argument-passing ABI rule /// for the parameter. class ParameterABIAttr : public InheritableParamAttr { diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 23a0996..7554089 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1038,7 +1038,7 @@ public: // PointerLikeTypeTraits is specialized so it can be used with a forward-decl of // Expr. Verify that we got it right. static_assert(llvm::PointerLikeTypeTraits<Expr *>::NumLowBitsAvailable <= - llvm::detail::ConstantLog2<alignof(Expr)>::value, + llvm::ConstantLog2<alignof(Expr)>(), "PointerLikeTypeTraits<Expr*> assumes too much alignment."); using ConstantExprKind = Expr::ConstantExprKind; diff --git a/clang/include/clang/Analysis/Analyses/LiveVariables.h b/clang/include/clang/Analysis/Analyses/LiveVariables.h index 8a3dd0c..90a0f0f 100644 --- a/clang/include/clang/Analysis/Analyses/LiveVariables.h +++ b/clang/include/clang/Analysis/Analyses/LiveVariables.h @@ -34,7 +34,7 @@ public: llvm::ImmutableSet<const VarDecl *> liveDecls; llvm::ImmutableSet<const BindingDecl *> liveBindings; - bool equals(const LivenessValues &V) const; + bool operator==(const LivenessValues &V) const; LivenessValues() : liveExprs(nullptr), liveDecls(nullptr), liveBindings(nullptr) {} @@ -58,13 +58,8 @@ public: /// A callback invoked right before invoking the /// liveness transfer function on the given statement. - virtual void observeStmt(const Stmt *S, - const CFGBlock *currentBlock, - const LivenessValues& V) {} - - /// Called when the live variables analysis registers - /// that a variable is killed. - virtual void observerKill(const DeclRefExpr *DR) {} + virtual void observeStmt(const Stmt *S, const CFGBlock *currentBlock, + const LivenessValues &V) {} }; ~LiveVariables() override; diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 6c97905..d20f172 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -35,6 +35,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" +#include <functional> #include <sstream> #include <string> #include <utility> @@ -386,6 +387,11 @@ public: SelfVar->setKind(til::Variable::VK_SFun); } + // Create placeholder for this: we don't know the VarDecl on construction yet. + til::LiteralPtr *createThisPlaceholder() { + return new (Arena) til::LiteralPtr(nullptr); + } + // Translate a clang expression in an attribute to a til::SExpr. // Constructs the context from D, DeclExp, and SelfDecl. CapabilityExpr translateAttrExpr(const Expr *AttrExp, const NamedDecl *D, @@ -394,8 +400,8 @@ public: CapabilityExpr translateAttrExpr(const Expr *AttrExp, CallingContext *Ctx); - // Translate a variable reference. - til::LiteralPtr *createVariable(const VarDecl *VD); + // Translate a VarDecl to its canonical TIL expression. + til::SExpr *translateVariable(const VarDecl *VD, CallingContext *Ctx); // Translate a clang statement or expression to a TIL expression. // Also performs substitution of variables; Ctx provides the context. @@ -412,6 +418,10 @@ public: const til::SCFG *getCFG() const { return Scfg; } til::SCFG *getCFG() { return Scfg; } + void setLookupLocalVarExpr(std::function<const Expr *(const NamedDecl *)> F) { + LookupLocalVarExpr = std::move(F); + } + private: // We implement the CFGVisitor API friend class CFGWalker; @@ -445,6 +455,7 @@ private: const AbstractConditionalOperator *C, CallingContext *Ctx); til::SExpr *translateDeclStmt(const DeclStmt *S, CallingContext *Ctx); + til::SExpr *translateStmtExpr(const StmtExpr *SE, CallingContext *Ctx); // Map from statements in the clang CFG to SExprs in the til::SCFG. using StatementMap = llvm::DenseMap<const Stmt *, til::SExpr *>; @@ -531,6 +542,11 @@ private: std::vector<til::Phi *> IncompleteArgs; til::BasicBlock *CurrentBB = nullptr; BlockInfo *CurrentBlockInfo = nullptr; + + // Recursion guard. + llvm::DenseSet<const ValueDecl *> VarsBeingTranslated; + // Context-dependent lookup of currently valid definitions of local variables. + std::function<const Expr *(const NamedDecl *)> LookupLocalVarExpr; }; #ifndef NDEBUG diff --git a/clang/include/clang/Analysis/FlowSensitive/RecordOps.h b/clang/include/clang/Analysis/FlowSensitive/RecordOps.h index 8fad45f..91204c0 100644 --- a/clang/include/clang/Analysis/FlowSensitive/RecordOps.h +++ b/clang/include/clang/Analysis/FlowSensitive/RecordOps.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_RECORDOPS_H #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_RECORDOPS_H +#include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" @@ -36,8 +37,11 @@ namespace dataflow { /// - The type of `Src` must be derived from `Dest`, or /// - The type of `Dest` must be derived from `Src` (in this case, any fields /// that are only present in `Dest` are not overwritten). +/// - The types of `Dest` and `Src` are both derived from a non-null +/// `TypeToCopy` (in this case, only fields present in `TypeToCopy` are +/// overwritten). void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, - Environment &Env); + Environment &Env, QualType TypeToCopy = QualType()); /// Returns whether the records `Loc1` and `Loc2` are equal. /// diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h index 8fcc6a4..534b9a0 100644 --- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h +++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h @@ -17,6 +17,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include <cassert> @@ -152,6 +153,11 @@ public: return {SyntheticFields.begin(), SyntheticFields.end()}; } + /// Add a synthetic field, if none by that name is already present. + void addSyntheticField(llvm::StringRef Name, StorageLocation &Loc) { + SyntheticFields.insert({Name, &Loc}); + } + /// Changes the child storage location for a field `D` of reference type. /// All other fields cannot change their storage location and always retain /// the storage location passed to the `RecordStorageLocation` constructor. @@ -164,6 +170,11 @@ public: Children[&D] = Loc; } + /// Add a child storage location for a field `D`, if not already present. + void addChild(const ValueDecl &D, StorageLocation *Loc) { + Children.insert({&D, Loc}); + } + llvm::iterator_range<FieldToLoc::const_iterator> children() const { return {Children.begin(), Children.end()}; } diff --git a/clang/include/clang/Basic/ABI.h b/clang/include/clang/Basic/ABI.h index 231bad7..8279529 100644 --- a/clang/include/clang/Basic/ABI.h +++ b/clang/include/clang/Basic/ABI.h @@ -27,14 +27,16 @@ enum CXXCtorType { Ctor_Comdat, ///< The COMDAT used for ctors Ctor_CopyingClosure, ///< Copying closure variant of a ctor Ctor_DefaultClosure, ///< Default closure variant of a ctor + Ctor_Unified, ///< GCC-style unified dtor }; /// C++ destructor types. enum CXXDtorType { - Dtor_Deleting, ///< Deleting dtor - Dtor_Complete, ///< Complete object dtor - Dtor_Base, ///< Base object dtor - Dtor_Comdat ///< The COMDAT used for dtors + Dtor_Deleting, ///< Deleting dtor + Dtor_Complete, ///< Complete object dtor + Dtor_Base, ///< Base object dtor + Dtor_Comdat, ///< The COMDAT used for dtors + Dtor_Unified, ///< GCC-style unified dtor }; } // end namespace clang diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index e672d85..cdaed4a 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -779,6 +779,16 @@ class DeclOrStmtAttr : InheritableAttr; /// An attribute class for HLSL Annotations. class HLSLAnnotationAttr : InheritableAttr; +class HLSLSemanticAttr<bit Indexable> : HLSLAnnotationAttr { + bit SemanticIndexable = Indexable; + int SemanticIndex = 0; + bit SemanticExplicitIndex = 0; + + let Spellings = []; + let Subjects = SubjectList<[ParmVar, Field, Function]>; + let LangOpts = [HLSL]; +} + /// A target-specific attribute. This class is meant to be used as a mixin /// with InheritableAttr or Attr depending on the attribute's needs. class TargetSpecificAttr<TargetSpec target> { @@ -954,7 +964,7 @@ def PatchableFunctionEntry : InheritableAttr, TargetSpecificAttr<TargetArch< ["aarch64", "aarch64_be", "loongarch32", "loongarch64", "riscv32", - "riscv64", "x86", "x86_64", "ppc", "ppc64"]>> { + "riscv64", "x86", "x86_64", "ppc", "ppc64", "ppc64le"]>> { let Spellings = [GCC<"patchable_function_entry">]; let Subjects = SubjectList<[Function, ObjCMethod]>; let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>, @@ -1185,6 +1195,7 @@ static llvm::Triple::EnvironmentType getEnvironmentType(llvm::StringRef Environm .Case("callable", llvm::Triple::Callable) .Case("mesh", llvm::Triple::Mesh) .Case("amplification", llvm::Triple::Amplification) + .Case("rootsignature", llvm::Triple::RootSignature) .Case("library", llvm::Triple::Library) .Default(llvm::Triple::UnknownEnvironment); } @@ -4889,27 +4900,6 @@ def HLSLNumThreads: InheritableAttr { let Documentation = [NumThreadsDocs]; } -def HLSLSV_GroupThreadID: HLSLAnnotationAttr { - let Spellings = [HLSLAnnotation<"sv_groupthreadid">]; - let Subjects = SubjectList<[ParmVar, Field]>; - let LangOpts = [HLSL]; - let Documentation = [HLSLSV_GroupThreadIDDocs]; -} - -def HLSLSV_GroupID: HLSLAnnotationAttr { - let Spellings = [HLSLAnnotation<"sv_groupid">]; - let Subjects = SubjectList<[ParmVar, Field]>; - let LangOpts = [HLSL]; - let Documentation = [HLSLSV_GroupIDDocs]; -} - -def HLSLSV_GroupIndex: HLSLAnnotationAttr { - let Spellings = [HLSLAnnotation<"sv_groupindex">]; - let Subjects = SubjectList<[ParmVar, GlobalVar]>; - let LangOpts = [HLSL]; - let Documentation = [HLSLSV_GroupIndexDocs]; -} - def HLSLVkBinding : InheritableAttr { let Spellings = [CXX11<"vk", "binding">]; let Subjects = SubjectList<[HLSLBufferObj, ExternalGlobalVar], ErrorDiag>; @@ -4968,13 +4958,35 @@ def HLSLResourceBinding: InheritableAttr { }]; } -def HLSLSV_Position : HLSLAnnotationAttr { - let Spellings = [HLSLAnnotation<"sv_position">]; - let Subjects = SubjectList<[ParmVar, Field]>; +def HLSLUnparsedSemantic : HLSLAnnotationAttr { + let Spellings = []; + let Args = [DefaultIntArgument<"Index", 0>, + DefaultBoolArgument<"ExplicitIndex", 0>]; + let Subjects = SubjectList<[ParmVar, Field, Function]>; let LangOpts = [HLSL]; + let Documentation = [InternalOnly]; +} + +def HLSLSV_Position : HLSLSemanticAttr</* Indexable= */ 1> { let Documentation = [HLSLSV_PositionDocs]; } +def HLSLSV_GroupThreadID : HLSLSemanticAttr</* Indexable= */ 0> { + let Documentation = [HLSLSV_GroupThreadIDDocs]; +} + +def HLSLSV_GroupID : HLSLSemanticAttr</* Indexable= */ 0> { + let Documentation = [HLSLSV_GroupIDDocs]; +} + +def HLSLSV_GroupIndex : HLSLSemanticAttr</* Indexable= */ 0> { + let Documentation = [HLSLSV_GroupIndexDocs]; +} + +def HLSLSV_DispatchThreadID : HLSLSemanticAttr</* Indexable= */ 0> { + let Documentation = [HLSLSV_DispatchThreadIDDocs]; +} + def HLSLPackOffset: HLSLAnnotationAttr { let Spellings = [HLSLAnnotation<"packoffset">]; let LangOpts = [HLSL]; @@ -4987,13 +4999,6 @@ def HLSLPackOffset: HLSLAnnotationAttr { }]; } -def HLSLSV_DispatchThreadID: HLSLAnnotationAttr { - let Spellings = [HLSLAnnotation<"sv_dispatchthreadid">]; - let Subjects = SubjectList<[ParmVar, Field]>; - let LangOpts = [HLSL]; - let Documentation = [HLSLSV_DispatchThreadIDDocs]; -} - def HLSLShader : InheritableAttr { let Spellings = [Microsoft<"shader">]; let Subjects = SubjectList<[HLSLEntry]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index dd22e55..ee212a9 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6719,7 +6719,7 @@ if omitted.``Section`` defaults to the ``-fpatchable-function-entry`` section n set, or to ``__patchable_function_entries`` otherwise. This attribute is only supported on -aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64 targets. +aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64/ppc64le targets. For ppc/ppc64 targets, AIX is still not supported. }]; } @@ -8367,6 +8367,23 @@ flag. }]; } +def DocHLSLSemantics : DocumentationCategory<"HLSL Semantics"> { + let Content = [{ +A semantic is a string attached to a shader input or output that conveys +information about the intended use of a parameter. Semantics are required on +all variables passed between shader stages. The syntax for adding a semantic +to a shader variable is shown here (Variable Syntax (DirectX HLSL)). + +In general, data passed between pipeline stages is completely generic and is +not uniquely interpreted by the system; arbitrary semantics are allowed which +have no special meaning. Parameters (in Direct3D 10 and later) which contain +these special semantics are referred to as System-Value Semantics. + +More information is available here: +https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics + }]; +} + def WaveSizeDocs : Documentation { let Category = DocCatFunction; let Content = [{ @@ -8604,7 +8621,7 @@ randomized. } def HLSLSV_GroupThreadIDDocs : Documentation { - let Category = DocCatFunction; + let Category = DocHLSLSemantics; let Content = [{ The ``SV_GroupThreadID`` semantic, when applied to an input parameter, specifies which individual thread within a thread group is executing in. This attribute is @@ -8615,7 +8632,7 @@ The full documentation is available here: https://docs.microsoft.com/en-us/windo } def HLSLSV_GroupIDDocs : Documentation { - let Category = DocCatFunction; + let Category = DocHLSLSemantics; let Content = [{ The ``SV_GroupID`` semantic, when applied to an input parameter, specifies which thread group a shader is executing in. This attribute is only supported in compute shaders. @@ -8625,7 +8642,7 @@ The full documentation is available here: https://docs.microsoft.com/en-us/windo } def HLSLSV_GroupIndexDocs : Documentation { - let Category = DocCatFunction; + let Category = DocHLSLSemantics; let Content = [{ The ``SV_GroupIndex`` semantic, when applied to an input parameter, specifies a data binding to map the group index to the specified parameter. This attribute @@ -8682,7 +8699,7 @@ The full documentation is available here: https://learn.microsoft.com/en-us/wind } def HLSLSV_DispatchThreadIDDocs : Documentation { - let Category = DocCatFunction; + let Category = DocHLSLSemantics; let Content = [{ The ``SV_DispatchThreadID`` semantic, when applied to an input parameter, specifies a data binding to map the global thread offset within the Dispatch @@ -8697,7 +8714,7 @@ The full documentation is available here: https://docs.microsoft.com/en-us/windo } def HLSLSV_PositionDocs : Documentation { - let Category = DocCatFunction; + let Category = DocHLSLSemantics; let Content = [{ The ``SV_Position`` semantic, when applied to an input parameter in a pixel shader, contains the location of the pixel center (x, y) in screen space. diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e5a1422..fda16e4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -34,6 +34,20 @@ BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_z, "Ui", "nc") +TARGET_BUILTIN(__builtin_amdgcn_cluster_id_x, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_id_y, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_id_z, "Ui", "nc", "gfx1250-insts") + +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_x, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_y, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_z, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_flat_id, "Ui", "nc", "gfx1250-insts") + +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_x, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_y, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_z, "Ui", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_flat_id, "Ui", "nc", "gfx1250-insts") + BUILTIN(__builtin_amdgcn_workitem_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc") BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc") @@ -365,6 +379,31 @@ BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n") BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n") //===----------------------------------------------------------------------===// + +// Wave Reduction builtins. + +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc") + +//===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b4ff550..1a8645f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -93,8 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "sse2" in { - def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; - def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; @@ -106,6 +104,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">; + def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; } @@ -575,8 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; @@ -618,6 +616,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; + def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; + def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">; def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; @@ -1056,27 +1057,27 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> } let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">; + def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; } let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">; + def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; } let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">; + def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; } let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">; + def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; } let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">; + def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; } let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">; + def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; } let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -1307,8 +1308,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; - def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; - def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } @@ -1350,6 +1349,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 } let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">; + def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } @@ -1772,75 +1773,30 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; + def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; } -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; } @@ -2002,6 +1958,10 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def psllv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; + def psrav8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; + def psrlv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; + def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; @@ -2025,15 +1985,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; - def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; +} + +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def psllv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } @@ -2044,15 +2007,15 @@ let Features = "avx512f", def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } @@ -2063,23 +2026,23 @@ let Features = "avx512f", def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def psrav32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def psrav16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psrav8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; } @@ -2431,13 +2394,10 @@ let Features = "avx512vl", let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psllv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrav8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrlv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def index c6e736e..a768b12 100644 --- a/clang/include/clang/Basic/DebugOptions.def +++ b/clang/include/clang/Basic/DebugOptions.def @@ -125,6 +125,12 @@ DEBUGOPT(DebugNameTable, 2, 0, Compatible) /// Whether to use DWARF base address specifiers in .debug_ranges. DEBUGOPT(DebugRangesBaseAddress, 1, 0, Compatible) +/// Whether to add linkage names to constructor/destructor declarations. +/// This is an escape hatch for cases where attaching the additional linkage +/// names would increase debug-info size (particularly the .debug_str section) +/// too much. +DEBUGOPT(DebugStructorDeclLinkageNames, 1, 0, Benign) + /// Whether to embed source in DWARF debug line section. DEBUGOPT(EmbedSource, 1, 0, Compatible) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 561637f..ceb6909 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -485,6 +485,9 @@ def warn_drv_overriding_option : Warning< def warn_drv_overriding_deployment_version : Warning<"overriding deployment version from '%0' to '%1'">, InGroup<DiagGroup<"overriding-deployment-version">>; +def warn_drv_overriding_complex_range : Warning< + "'%1' sets complex range to \"%3\" overriding the setting of \"%2\" that was implied by '%0'">, + InGroup<DiagGroup<"overriding-complex-range">>; def warn_drv_treating_input_as_cxx : Warning< "treating '%0' input as '%1' when in C++ mode, this behavior is deprecated">, InGroup<Deprecated>; diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 1544755..2fd2ae4 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -400,6 +400,10 @@ def warn_hlsl_langstd_minimal : "recommend using %1 instead">, InGroup<HLSLDXCCompat>; +def err_hlsl_semantic_missing : Error<"semantic annotations must be present " + "for all input and outputs of an entry " + "function or patch constant function">; + // ClangIR frontend errors def err_cir_to_cir_transform_failed : Error< "CIR-to-CIR transformation failed">, DefaultFatal; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index bc7a6e2..968a7c5 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1860,9 +1860,8 @@ def note_max_tokens_total_override : Note<"total token limit set here">; def err_expected_semantic_identifier : Error< "expected HLSL Semantic identifier">; -def err_invalid_declaration_in_hlsl_buffer : Error< - "invalid declaration inside %select{tbuffer|cbuffer}0">; -def err_unknown_hlsl_semantic : Error<"unknown HLSL semantic %0">; +def err_invalid_declaration_in_hlsl_buffer + : Error<"invalid declaration inside %select{tbuffer|cbuffer}0">; def err_hlsl_separate_attr_arg_and_number : Error<"wrong argument format for hlsl attribute, use %0 instead">; def ext_hlsl_access_specifiers : ExtWarn< "access specifiers are a clang HLSL extension">, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 711efbe..b0e669c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13121,6 +13121,11 @@ def err_hlsl_duplicate_parameter_modifier : Error<"duplicate parameter modifier def err_hlsl_missing_semantic_annotation : Error< "semantic annotations must be present for all parameters of an entry " "function or patch constant function">; +def err_hlsl_unknown_semantic : Error<"unknown HLSL semantic %0">; +def err_hlsl_semantic_output_not_supported + : Error<"semantic %0 does not support output">; +def err_hlsl_semantic_indexing_not_supported + : Error<"semantic %0 does not allow indexing">; def err_hlsl_init_priority_unsupported : Error< "initializer priorities are not supported in HLSL">; @@ -13147,6 +13152,8 @@ def err_hlsl_attribute_needs_intangible_type: Error<"attribute %0 can be used on def err_hlsl_incorrect_num_initializers: Error< "too %select{few|many}0 initializers in list for type %1 " "(expected %2 but found %3)">; +def err_hlsl_rootsignature_entry: Error< + "rootsignature specified as target environment but entry, %0, was not defined">; def err_hlsl_operator_unsupported : Error< "the '%select{&|*|->}0' operator is unsupported in HLSL">; @@ -13195,6 +13202,9 @@ def err_hlsl_vk_literal_must_contain_constant: Error<"the argument to vk::Litera def err_hlsl_invalid_rootsig_value : Error<"value must be in the range [%0, %1]">; def err_hlsl_invalid_rootsig_flag : Error< "invalid flags for version 1.%0">; +def err_hlsl_invalid_mixed_resources: Error< "sampler and non-sampler resource mixed in descriptor table">; +def err_hlsl_appending_onto_unbound: Error<"offset appends to unbounded descriptor range">; +def err_hlsl_offset_overflow: Error<"descriptor range offset overflows [%0, %1]">; def subst_hlsl_format_ranges: TextSubstitution< "%select{t|u|b|s}0[%1;%select{%3]|unbounded)}2">; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 8dc4ca2..a3f167e 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -223,6 +223,22 @@ public: mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, mlir::Type type, llvm::StringRef name, + mlir::IntegerAttr alignment, + mlir::Value dynAllocSize) { + return cir::AllocaOp::create(*this, loc, addrType, type, name, alignment, + dynAllocSize); + } + + mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, + mlir::Type type, llvm::StringRef name, + clang::CharUnits alignment, + mlir::Value dynAllocSize) { + mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); + return createAlloca(loc, addrType, type, name, alignmentAttr, dynAllocSize); + } + + mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, + mlir::Type type, llvm::StringRef name, mlir::IntegerAttr alignment) { return cir::AllocaOp::create(*this, loc, addrType, type, name, alignment); } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index bbbd10b..b3c435c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -341,6 +341,11 @@ def CIR_AllocaOp : CIR_Op<"alloca", [ The presence of the `const` attribute indicates that the local variable is declared with C/C++ `const` keyword. + The `dynAllocSize` specifies the size to dynamically allocate on the stack + and ignores the allocation size based on the original type. This is useful + when handling VLAs or the `alloca` builtin and is omitted when declaring + regular local variables. + The result type is a pointer to the input's type. Example: @@ -356,6 +361,7 @@ def CIR_AllocaOp : CIR_Op<"alloca", [ }]; let arguments = (ins + Optional<CIR_AnyFundamentalIntType>:$dynAllocSize, TypeAttr:$allocaType, StrAttr:$name, UnitAttr:$init, @@ -372,16 +378,29 @@ def CIR_AllocaOp : CIR_Op<"alloca", [ OpBuilder<(ins "mlir::Type":$addr, "mlir::Type":$allocaType, "llvm::StringRef":$name, - "mlir::IntegerAttr":$alignment)> + "mlir::IntegerAttr":$alignment)>, + + OpBuilder<(ins "mlir::Type":$addr, + "mlir::Type":$allocaType, + "llvm::StringRef":$name, + "mlir::IntegerAttr":$alignment, + "mlir::Value":$dynAllocSize), + [{ + if (dynAllocSize) + $_state.addOperands(dynAllocSize); + build($_builder, $_state, addr, allocaType, name, alignment); + }]> ]; let extraClassDeclaration = [{ // Whether the alloca input type is a pointer. bool isPointerType() { return ::mlir::isa<::cir::PointerType>(getAllocaType()); } + bool isDynamic() { return (bool)getDynAllocSize(); } }]; let assemblyFormat = [{ $allocaType `,` qualified(type($addr)) `,` + ($dynAllocSize^ `:` type($dynAllocSize) `,`)? `[` $name (`,` `init` $init^)? (`,` `const` $constant^)? @@ -3808,6 +3827,26 @@ def CIR_ACosOp : CIR_UnaryFPToFPBuiltinOp<"acos", "ACosOp"> { }]; } +def CIR_ASinOp : CIR_UnaryFPToFPBuiltinOp<"asin", "ASinOp"> { + let summary = "Computes the arcus sine of the specified value"; + let description = [{ + `cir.asin`computes the arcus sine of a given value and + returns a result of the same type. + + Floating-point exceptions are ignored, and it does not set `errno`. + }]; +} + +def CIR_ATanOp : CIR_UnaryFPToFPBuiltinOp<"atan", "ATanOp"> { + let summary = "Computes the floating-point arcus tangent value"; + let description = [{ + `cir.atan` computes the arcus tangent of a floating-point operand + and returns a result of the same type. + + Floating-point exceptions are ignored, and it does not set `errno`. + }]; +} + def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> { let summary = "Computes the floating-point absolute value"; let description = [{ diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 948e3fe..52d5f8a 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -62,7 +62,6 @@ struct MissingFeatures { static bool opAllocaEscapeByReference() { return false; } static bool opAllocaReference() { return false; } static bool opAllocaAnnotations() { return false; } - static bool opAllocaDynAllocSize() { return false; } static bool opAllocaCaptureByInit() { return false; } // FuncOp handling @@ -245,6 +244,7 @@ struct MissingFeatures { static bool moduleNameHash() { return false; } static bool msabi() { return false; } static bool needsGlobalCtorDtor() { return false; } + static bool nrvo() { return false; } static bool objCBlocks() { return false; } static bool objCGC() { return false; } static bool objCLifetime() { return false; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 718808d..a7c514e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3281,7 +3281,8 @@ defm declspec : BoolOption<"f", "declspec", def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>, Flags<[]>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<directory>">, - HelpText<"Specify the module cache path">; + HelpText<"Specify the module cache path">, + MarshallingInfoString<HeaderSearchOpts<"ModuleCachePath">>; def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Group<i_Group>, Flags<[]>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<directory>">, @@ -4789,6 +4790,18 @@ def gembed_source : Flag<["-"], "gembed-source">, Group<g_flags_Group>, def gno_embed_source : Flag<["-"], "gno-embed-source">, Group<g_flags_Group>, Flags<[NoXarchOption]>, HelpText<"Restore the default behavior of not embedding source text in DWARF debug sections">; +defm structor_decl_linkage_names + : BoolGOption<"structor-decl-linkage-names", + CodeGenOpts<"DebugStructorDeclLinkageNames">, DefaultTrue, + NegFlag<SetFalse>, + PosFlag<SetTrue, [], [], + "Attach linkage names to C++ constructor/destructor " + "declarations in DWARF." + "Implies -g.">, + BothFlags<[], [ClangOption, CLOption, CC1Option]>>, + DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name``s to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``. + +Enabling this flag results in a better interactive debugging experience (both GDB and LLDB have support for understanding these "unified" linkage names). However, it comes with a significant increase in debug-info size (particularly the `.debug_str` section). As an escape hatch, users can disable this feature using ``-gno-structor-decl-linkage-names``.}]>; defm key_instructions : BoolGOption<"key-instructions", CodeGenOpts<"DebugKeyInstructions">, DefaultFalse, NegFlag<SetFalse>, PosFlag<SetTrue, [], [], @@ -9406,6 +9419,8 @@ def dxc_Fo : DXCJoinedOrSeparate<"Fo">, HelpText<"Output object file">; def dxc_Fc : DXCJoinedOrSeparate<"Fc">, HelpText<"Output assembly listing file">; +def dxc_Frs : DXCJoinedOrSeparate<"Frs">, + HelpText<"Output additional root signature object file">; def dxil_validator_version : Option<["/", "-"], "validator-version", KIND_SEPARATE>, Group<dxc_Group>, Flags<[HelpHidden]>, Visibility<[DXCOption, ClangOption, CC1Option]>, @@ -9432,7 +9447,8 @@ def target_profile : DXCJoinedOrSeparate<"T">, MetaVarName<"<profile>">, "cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7," "lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_x," "ms_6_5, ms_6_6, ms_6_7," - "as_6_5, as_6_6, as_6_7">; + "as_6_5, as_6_6, as_6_7," + "rootsig_1_0, rootsig_1_1">; def emit_pristine_llvm : DXCFlag<"emit-pristine-llvm">, HelpText<"Emit pristine LLVM IR from the frontend by not running any LLVM passes at all." "Same as -S + -emit-llvm + -disable-llvm-passes.">; diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 2e0c8be..850aea4 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -986,6 +986,9 @@ void ApplyHeaderSearchOptions(HeaderSearch &HS, const LangOptions &Lang, const llvm::Triple &triple); +void normalizeModuleCachePath(FileManager &FileMgr, StringRef Path, + SmallVectorImpl<char> &NormalizedPath); + } // namespace clang #endif // LLVM_CLANG_LEX_HEADERSEARCH_H diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index c87e6637c..b06846f 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -240,6 +240,8 @@ IdentifierInfo *ParseHLSLRootSignature(Sema &Actions, llvm::dxbc::RootSignatureVersion Version, StringLiteral *Signature); +void HandleRootSignatureTarget(Sema &S, StringRef EntryRootSig); + } // namespace hlsl } // namespace clang diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a9a87fb..30edd30 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -5188,6 +5188,14 @@ private: ParseHLSLAnnotations(Attrs, EndLoc); } + struct ParsedSemantic { + StringRef Name = ""; + unsigned Index = 0; + bool Explicit = false; + }; + + ParsedSemantic ParseHLSLSemantic(); + void ParseHLSLAnnotations(ParsedAttributes &Attrs, SourceLocation *EndLoc = nullptr, bool CouldBeBitField = false); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 88b67ee..a7600ab 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -845,9 +845,16 @@ enum AttrName { Target, TargetClones, TargetVersion }; void inferNoReturnAttr(Sema &S, const Decl *D); +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" +#endif /// Sema - This implements semantic analysis and AST building for C. /// \nosubgrouping class Sema final : public SemaBase { +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif // Table of Contents // ----------------- // 1. Semantic Analysis (Sema.cpp) diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 5cbe1b6..b5ddca0 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -17,6 +17,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" +#include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/SemaBase.h" #include "llvm/ADT/SmallVector.h" @@ -129,6 +130,7 @@ public: bool ActOnUninitializedVarDecl(VarDecl *D); void ActOnEndOfTranslationUnit(TranslationUnitDecl *TU); void CheckEntryPoint(FunctionDecl *FD); + bool isSemanticValid(FunctionDecl *FD, DeclaratorDecl *D); void CheckSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param, const HLSLAnnotationAttr *AnnotationAttr); void DiagnoseAttrStageMismatch( @@ -157,6 +159,8 @@ public: RootSigOverrideIdent = DeclIdent; } + HLSLRootSignatureDecl *lookupRootSignatureOverrideDecl(DeclContext *DC) const; + // Returns true if any RootSignatureElement is invalid and a diagnostic was // produced bool @@ -166,16 +170,31 @@ public: void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); void handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL); void handleVkBindingAttr(Decl *D, const ParsedAttr &AL); - void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); - void handleSV_GroupThreadIDAttr(Decl *D, const ParsedAttr &AL); - void handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL); - void handleSV_PositionAttr(Decl *D, const ParsedAttr &AL); void handlePackOffsetAttr(Decl *D, const ParsedAttr &AL); void handleShaderAttr(Decl *D, const ParsedAttr &AL); void handleResourceBindingAttr(Decl *D, const ParsedAttr &AL); void handleParamModifierAttr(Decl *D, const ParsedAttr &AL); bool handleResourceTypeAttr(QualType T, const ParsedAttr &AL); + template <typename T> + T *createSemanticAttr(const ParsedAttr &AL, + std::optional<unsigned> Location) { + T *Attr = ::new (getASTContext()) T(getASTContext(), AL); + if (Attr->isSemanticIndexable()) + Attr->setSemanticIndex(Location ? *Location : 0); + else if (Location.has_value()) { + Diag(Attr->getLocation(), diag::err_hlsl_semantic_indexing_not_supported) + << Attr->getAttrName()->getName(); + return nullptr; + } + + return Attr; + } + + void diagnoseSystemSemanticAttr(Decl *D, const ParsedAttr &AL, + std::optional<unsigned> Index); + void handleSemanticAttr(Decl *D, const ParsedAttr &AL); + void handleVkExtBuiltinInputAttr(Decl *D, const ParsedAttr &AL); bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index dca05b4..ed4c6b0 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5316,7 +5316,8 @@ ASTContext::getTypedefType(ElaboratedTypeKeyword Keyword, } llvm::FoldingSetNodeID ID; - TypedefType::Profile(ID, Keyword, Qualifier, Decl, UnderlyingType); + TypedefType::Profile(ID, Keyword, Qualifier, Decl, + *TypeMatchesDeclOrNone ? QualType() : UnderlyingType); void *InsertPos = nullptr; if (FoldingSetPlaceholder<TypedefType> *Placeholder = @@ -14194,7 +14195,11 @@ static QualType getCommonNonSugarTypeNode(const ASTContext &Ctx, const Type *X, FunctionProtoType::ExtProtoInfo EPIX = FX->getExtProtoInfo(), EPIY = FY->getExtProtoInfo(); assert(EPIX.ExtInfo == EPIY.ExtInfo); - assert(EPIX.ExtParameterInfos == EPIY.ExtParameterInfos); + assert(!EPIX.ExtParameterInfos == !EPIY.ExtParameterInfos); + assert(!EPIX.ExtParameterInfos || + llvm::equal( + llvm::ArrayRef(EPIX.ExtParameterInfos, FX->getNumParams()), + llvm::ArrayRef(EPIY.ExtParameterInfos, FY->getNumParams()))); assert(EPIX.RefQualifier == EPIY.RefQualifier); assert(EPIX.TypeQuals == EPIY.TypeQuals); assert(EPIX.Variadic == EPIY.Variadic); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index a213583..3f7db39 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -114,31 +114,25 @@ template <class Emitter> class LoopScope final { public: using LabelTy = typename Compiler<Emitter>::LabelTy; using OptLabelTy = typename Compiler<Emitter>::OptLabelTy; + using LabelInfo = typename Compiler<Emitter>::LabelInfo; - LoopScope(Compiler<Emitter> *Ctx, LabelTy BreakLabel, LabelTy ContinueLabel) - : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel), - OldContinueLabel(Ctx->ContinueLabel), - OldBreakVarScope(Ctx->BreakVarScope), - OldContinueVarScope(Ctx->ContinueVarScope) { - this->Ctx->BreakLabel = BreakLabel; - this->Ctx->ContinueLabel = ContinueLabel; - this->Ctx->BreakVarScope = this->Ctx->VarScope; - this->Ctx->ContinueVarScope = this->Ctx->VarScope; - } + LoopScope(Compiler<Emitter> *Ctx, const Stmt *Name, LabelTy BreakLabel, + LabelTy ContinueLabel) + : Ctx(Ctx) { +#ifndef NDEBUG + for (const LabelInfo &LI : Ctx->LabelInfoStack) + assert(LI.Name != Name); +#endif - ~LoopScope() { - this->Ctx->BreakLabel = OldBreakLabel; - this->Ctx->ContinueLabel = OldContinueLabel; - this->Ctx->ContinueVarScope = OldContinueVarScope; - this->Ctx->BreakVarScope = OldBreakVarScope; + this->Ctx->LabelInfoStack.emplace_back(Name, BreakLabel, ContinueLabel, + /*DefaultLabel=*/std::nullopt, + Ctx->VarScope); } + ~LoopScope() { this->Ctx->LabelInfoStack.pop_back(); } + private: Compiler<Emitter> *Ctx; - OptLabelTy OldBreakLabel; - OptLabelTy OldContinueLabel; - VariableScope<Emitter> *OldBreakVarScope; - VariableScope<Emitter> *OldContinueVarScope; }; // Sets the context for a switch scope, mapping labels. @@ -147,32 +141,30 @@ public: using LabelTy = typename Compiler<Emitter>::LabelTy; using OptLabelTy = typename Compiler<Emitter>::OptLabelTy; using CaseMap = typename Compiler<Emitter>::CaseMap; + using LabelInfo = typename Compiler<Emitter>::LabelInfo; + + SwitchScope(Compiler<Emitter> *Ctx, const Stmt *Name, CaseMap &&CaseLabels, + LabelTy BreakLabel, OptLabelTy DefaultLabel) + : Ctx(Ctx), OldCaseLabels(std::move(this->Ctx->CaseLabels)) { +#ifndef NDEBUG + for (const LabelInfo &LI : Ctx->LabelInfoStack) + assert(LI.Name != Name); +#endif - SwitchScope(Compiler<Emitter> *Ctx, CaseMap &&CaseLabels, LabelTy BreakLabel, - OptLabelTy DefaultLabel) - : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel), - OldDefaultLabel(this->Ctx->DefaultLabel), - OldCaseLabels(std::move(this->Ctx->CaseLabels)), - OldLabelVarScope(Ctx->BreakVarScope) { - this->Ctx->BreakLabel = BreakLabel; - this->Ctx->DefaultLabel = DefaultLabel; this->Ctx->CaseLabels = std::move(CaseLabels); - this->Ctx->BreakVarScope = this->Ctx->VarScope; + this->Ctx->LabelInfoStack.emplace_back(Name, BreakLabel, + /*ContinueLabel=*/std::nullopt, + DefaultLabel, Ctx->VarScope); } ~SwitchScope() { - this->Ctx->BreakLabel = OldBreakLabel; - this->Ctx->DefaultLabel = OldDefaultLabel; this->Ctx->CaseLabels = std::move(OldCaseLabels); - this->Ctx->BreakVarScope = OldLabelVarScope; + this->Ctx->LabelInfoStack.pop_back(); } private: Compiler<Emitter> *Ctx; - OptLabelTy OldBreakLabel; - OptLabelTy OldDefaultLabel; CaseMap OldCaseLabels; - VariableScope<Emitter> *OldLabelVarScope; }; template <class Emitter> class StmtExprScope final { @@ -2949,19 +2941,17 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr( if (!this->emitSetLocal(*SubExprT, LocalIndex, E)) return false; return this->emitGetPtrLocal(LocalIndex, E); - } else { + } - if (!this->checkLiteralType(SubExpr)) + if (!this->checkLiteralType(SubExpr)) + return false; + const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments(); + if (UnsignedOrNone LocalIndex = + allocateLocal(E, Inner->getType(), E->getExtendingDecl())) { + InitLinkScope<Emitter> ILS(this, InitLink::Temp(*LocalIndex)); + if (!this->emitGetPtrLocal(*LocalIndex, E)) return false; - - const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments(); - if (UnsignedOrNone LocalIndex = - allocateLocal(E, Inner->getType(), E->getExtendingDecl())) { - InitLinkScope<Emitter> ILS(this, InitLink::Temp(*LocalIndex)); - if (!this->emitGetPtrLocal(*LocalIndex, E)) - return false; - return this->visitInitializer(SubExpr) && this->emitFinishInit(E); - } + return this->visitInitializer(SubExpr) && this->emitFinishInit(E); } return false; } @@ -5687,7 +5677,8 @@ bool Compiler<Emitter>::visitWhileStmt(const WhileStmt *S) { LabelTy CondLabel = this->getLabel(); // Label before the condition. LabelTy EndLabel = this->getLabel(); // Label after the loop. - LoopScope<Emitter> LS(this, EndLabel, CondLabel); + LocalScope<Emitter> WholeLoopScope(this); + LoopScope<Emitter> LS(this, S, EndLabel, CondLabel); this->fallthrough(CondLabel); this->emitLabel(CondLabel); @@ -5717,8 +5708,7 @@ bool Compiler<Emitter>::visitWhileStmt(const WhileStmt *S) { return false; this->fallthrough(EndLabel); this->emitLabel(EndLabel); - - return true; + return WholeLoopScope.destroyLocals(); } template <class Emitter> bool Compiler<Emitter>::visitDoStmt(const DoStmt *S) { @@ -5728,7 +5718,8 @@ template <class Emitter> bool Compiler<Emitter>::visitDoStmt(const DoStmt *S) { LabelTy StartLabel = this->getLabel(); LabelTy EndLabel = this->getLabel(); LabelTy CondLabel = this->getLabel(); - LoopScope<Emitter> LS(this, EndLabel, CondLabel); + LocalScope<Emitter> WholeLoopScope(this); + LoopScope<Emitter> LS(this, S, EndLabel, CondLabel); this->fallthrough(StartLabel); this->emitLabel(StartLabel); @@ -5750,7 +5741,7 @@ template <class Emitter> bool Compiler<Emitter>::visitDoStmt(const DoStmt *S) { this->fallthrough(EndLabel); this->emitLabel(EndLabel); - return true; + return WholeLoopScope.destroyLocals(); } template <class Emitter> @@ -5764,19 +5755,21 @@ bool Compiler<Emitter>::visitForStmt(const ForStmt *S) { LabelTy EndLabel = this->getLabel(); LabelTy CondLabel = this->getLabel(); LabelTy IncLabel = this->getLabel(); - LoopScope<Emitter> LS(this, EndLabel, IncLabel); + LocalScope<Emitter> WholeLoopScope(this); if (Init && !this->visitStmt(Init)) return false; + // Start of the loop body { this->fallthrough(CondLabel); this->emitLabel(CondLabel); - // Start of loop body. LocalScope<Emitter> CondScope(this); - if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt()) + LoopScope<Emitter> LS(this, S, EndLabel, IncLabel); + if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt()) { if (!visitDeclStmt(CondDecl)) return false; + } if (Cond) { if (!this->visitBool(Cond)) @@ -5799,12 +5792,12 @@ bool Compiler<Emitter>::visitForStmt(const ForStmt *S) { return false; if (!this->jump(CondLabel)) return false; - // End of loop body. + // } End of loop body. this->emitLabel(EndLabel); // If we jumped out of the loop above, we still need to clean up the condition // scope. - return CondScope.destroyLocals(); + return CondScope.destroyLocals() && WholeLoopScope.destroyLocals(); } template <class Emitter> @@ -5820,7 +5813,8 @@ bool Compiler<Emitter>::visitCXXForRangeStmt(const CXXForRangeStmt *S) { LabelTy EndLabel = this->getLabel(); LabelTy CondLabel = this->getLabel(); LabelTy IncLabel = this->getLabel(); - LoopScope<Emitter> LS(this, EndLabel, IncLabel); + LocalScope<Emitter> WholeLoopScope(this); + LoopScope<Emitter> LS(this, S, EndLabel, IncLabel); // Emit declarations needed in the loop. if (Init && !this->visitStmt(Init)) @@ -5859,29 +5853,78 @@ bool Compiler<Emitter>::visitCXXForRangeStmt(const CXXForRangeStmt *S) { this->fallthrough(EndLabel); this->emitLabel(EndLabel); - return true; + return WholeLoopScope.destroyLocals(); } template <class Emitter> bool Compiler<Emitter>::visitBreakStmt(const BreakStmt *S) { - if (!BreakLabel) + if (LabelInfoStack.empty()) return false; - for (VariableScope<Emitter> *C = VarScope; C != BreakVarScope; + OptLabelTy TargetLabel = std::nullopt; + const Stmt *TargetLoop = S->getNamedLoopOrSwitch(); + const VariableScope<Emitter> *BreakScope = nullptr; + + if (!TargetLoop) { + for (const auto &LI : llvm::reverse(LabelInfoStack)) { + if (LI.BreakLabel) { + TargetLabel = *LI.BreakLabel; + BreakScope = LI.BreakOrContinueScope; + break; + } + } + } else { + for (auto LI : LabelInfoStack) { + if (LI.Name == TargetLoop) { + TargetLabel = *LI.BreakLabel; + BreakScope = LI.BreakOrContinueScope; + break; + } + } + } + + assert(TargetLabel); + + for (VariableScope<Emitter> *C = this->VarScope; C != BreakScope; C = C->getParent()) C->emitDestruction(); - return this->jump(*BreakLabel); + + return this->jump(*TargetLabel); } template <class Emitter> bool Compiler<Emitter>::visitContinueStmt(const ContinueStmt *S) { - if (!ContinueLabel) + if (LabelInfoStack.empty()) return false; - for (VariableScope<Emitter> *C = VarScope; - C && C->getParent() != ContinueVarScope; C = C->getParent()) + OptLabelTy TargetLabel = std::nullopt; + const Stmt *TargetLoop = S->getNamedLoopOrSwitch(); + const VariableScope<Emitter> *ContinueScope = nullptr; + + if (!TargetLoop) { + for (const auto &LI : llvm::reverse(LabelInfoStack)) { + if (LI.ContinueLabel) { + TargetLabel = *LI.ContinueLabel; + ContinueScope = LI.BreakOrContinueScope; + break; + } + } + } else { + for (auto LI : LabelInfoStack) { + if (LI.Name == TargetLoop) { + TargetLabel = *LI.ContinueLabel; + ContinueScope = LI.BreakOrContinueScope; + break; + } + } + } + assert(TargetLabel); + + for (VariableScope<Emitter> *C = VarScope; C != ContinueScope; + C = C->getParent()) C->emitDestruction(); - return this->jump(*ContinueLabel); + + return this->jump(*TargetLabel); } template <class Emitter> @@ -5894,7 +5937,7 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { LocalScope<Emitter> LS(this); LabelTy EndLabel = this->getLabel(); - OptLabelTy DefaultLabel = std::nullopt; + UnsignedOrNone DefaultLabel = std::nullopt; unsigned CondVar = this->allocateLocalPrimitive(Cond, CondT, /*IsConst=*/true); @@ -5955,7 +5998,8 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { return false; } - SwitchScope<Emitter> SS(this, std::move(CaseLabels), EndLabel, DefaultLabel); + SwitchScope<Emitter> SS(this, S, std::move(CaseLabels), EndLabel, + DefaultLabel); if (!this->visitStmt(S->getBody())) return false; this->emitLabel(EndLabel); @@ -5971,7 +6015,18 @@ bool Compiler<Emitter>::visitCaseStmt(const CaseStmt *S) { template <class Emitter> bool Compiler<Emitter>::visitDefaultStmt(const DefaultStmt *S) { - this->emitLabel(*DefaultLabel); + if (LabelInfoStack.empty()) + return false; + + LabelTy DefaultLabel; + for (const LabelInfo &LI : llvm::reverse(LabelInfoStack)) { + if (LI.DefaultLabel) { + DefaultLabel = *LI.DefaultLabel; + break; + } + } + + this->emitLabel(DefaultLabel); return this->visitStmt(S->getSubStmt()); } diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index bb8c660..c97dc18 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -112,9 +112,23 @@ protected: // Aliases for types defined in the emitter. using LabelTy = typename Emitter::LabelTy; using AddrTy = typename Emitter::AddrTy; - using OptLabelTy = std::optional<LabelTy>; + using OptLabelTy = UnsignedOrNone; using CaseMap = llvm::DenseMap<const SwitchCase *, LabelTy>; + struct LabelInfo { + const Stmt *Name; + const VariableScope<Emitter> *BreakOrContinueScope; + OptLabelTy BreakLabel; + OptLabelTy ContinueLabel; + OptLabelTy DefaultLabel; + LabelInfo(const Stmt *Name, OptLabelTy BreakLabel, OptLabelTy ContinueLabel, + OptLabelTy DefaultLabel, + const VariableScope<Emitter> *BreakOrContinueScope) + : Name(Name), BreakOrContinueScope(BreakOrContinueScope), + BreakLabel(BreakLabel), ContinueLabel(ContinueLabel), + DefaultLabel(DefaultLabel) {} + }; + /// Current compilation context. Context &Ctx; /// Program to link to. @@ -443,17 +457,8 @@ protected: /// Switch case mapping. CaseMap CaseLabels; - - /// Scope to cleanup until when we see a break statement. - VariableScope<Emitter> *BreakVarScope = nullptr; - /// Point to break to. - OptLabelTy BreakLabel; - /// Scope to cleanup until when we see a continue statement. - VariableScope<Emitter> *ContinueVarScope = nullptr; - /// Point to continue to. - OptLabelTy ContinueLabel; - /// Default case label. - OptLabelTy DefaultLabel; + /// Stack of label information for loops and switch statements. + llvm::SmallVector<LabelInfo> LabelInfoStack; const FunctionDecl *CompilingFunction = nullptr; }; diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index d0aa8d8..e349397 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -331,18 +331,17 @@ bool EvalEmitter::emitDestroy(uint32_t I, const SourceInfo &Info) { /// This is what we do here. void EvalEmitter::updateGlobalTemporaries() { for (const auto &[E, Temp] : S.SeenGlobalTemporaries) { - if (UnsignedOrNone GlobalIndex = P.getGlobal(E)) { - const Pointer &Ptr = P.getPtrGlobal(*GlobalIndex); - APValue *Cached = Temp->getOrCreateValue(true); - - if (OptPrimType T = Ctx.classify(E->getType())) { - TYPE_SWITCH( - *T, { *Cached = Ptr.deref<T>().toAPValue(Ctx.getASTContext()); }); - } else { - if (std::optional<APValue> APV = - Ptr.toRValue(Ctx, Temp->getTemporaryExpr()->getType())) - *Cached = *APV; - } + UnsignedOrNone GlobalIndex = P.getGlobal(E); + assert(GlobalIndex); + const Pointer &Ptr = P.getPtrGlobal(*GlobalIndex); + APValue *Cached = Temp->getOrCreateValue(true); + if (OptPrimType T = Ctx.classify(E->getType())) { + TYPE_SWITCH(*T, + { *Cached = Ptr.deref<T>().toAPValue(Ctx.getASTContext()); }); + } else { + if (std::optional<APValue> APV = + Ptr.toRValue(Ctx, Temp->getTemporaryExpr()->getType())) + *Cached = *APV; } } S.SeenGlobalTemporaries.clear(); diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index b64ed8c..b961a41 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -792,18 +792,18 @@ bool CheckLocalLoad(InterpState &S, CodePtr OpPC, const Block *B) { bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { - // Block pointers are the only ones we can actually read from. - if (!Ptr.isBlockPointer()) { - if (Ptr.isZero()) { - const auto &Src = S.Current->getSource(OpPC); + if (Ptr.isZero()) { + const auto &Src = S.Current->getSource(OpPC); - if (Ptr.isField()) - S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; - else - S.FFDiag(Src, diag::note_constexpr_access_null) << AK; - } + if (Ptr.isField()) + S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; + else + S.FFDiag(Src, diag::note_constexpr_access_null) << AK; return false; } + // Block pointers are the only ones we can actually read from. + if (!Ptr.isBlockPointer()) + return false; if (!Ptr.block()->isAccessible()) { if (!CheckLive(S, OpPC, Ptr, AK)) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 2da2202..9a7bd03 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1527,15 +1527,12 @@ bool InitGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { template <PrimType Name, class T = typename PrimConv<Name>::T> bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I, const LifetimeExtendedTemporaryDecl *Temp) { - const Pointer &Ptr = S.P.getGlobal(I); - - const T Value = S.Stk.peek<T>(); - APValue APV = Value.toAPValue(S.getASTContext()); - APValue *Cached = Temp->getOrCreateValue(true); - *Cached = APV; + if (S.EvalMode == EvaluationMode::ConstantFold) + return false; + assert(Temp); + const Pointer &Ptr = S.P.getGlobal(I); assert(Ptr.getDeclDesc()->asExpr()); - S.SeenGlobalTemporaries.push_back( std::make_pair(Ptr.getDeclDesc()->asExpr(), Temp)); @@ -1549,20 +1546,14 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I, /// 3) Initialized global with index \I with that inline bool InitGlobalTempComp(InterpState &S, CodePtr OpPC, const LifetimeExtendedTemporaryDecl *Temp) { + if (S.EvalMode == EvaluationMode::ConstantFold) + return false; assert(Temp); - const Pointer &P = S.Stk.peek<Pointer>(); - APValue *Cached = Temp->getOrCreateValue(true); + const Pointer &Ptr = S.Stk.peek<Pointer>(); S.SeenGlobalTemporaries.push_back( - std::make_pair(P.getDeclDesc()->asExpr(), Temp)); - - if (std::optional<APValue> APV = - P.toRValue(S.getASTContext(), Temp->getTemporaryExpr()->getType())) { - *Cached = *APV; - return true; - } - - return false; + std::make_pair(Ptr.getDeclDesc()->asExpr(), Temp)); + return true; } template <PrimType Name, class T = typename PrimConv<Name>::T> diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a0dcdac..4461731 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -300,6 +300,9 @@ static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, if (!CheckDummy(S, OpPC, StrPtr.block(), AK_Read)) return false; + if (!StrPtr.getFieldDesc()->isPrimitiveArray()) + return false; + assert(StrPtr.getFieldDesc()->isPrimitiveArray()); unsigned ElemSize = StrPtr.getFieldDesc()->getElemSize(); @@ -910,6 +913,9 @@ static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC, APSInt RHS = popToAPSInt(S.Stk, RHST); APSInt LHS = popToAPSInt(S.Stk, LHST); + if (CarryOutPtr.isDummy()) + return false; + APSInt CarryOut; APSInt Result; @@ -2851,11 +2857,32 @@ static bool interp__builtin_elementwise_triop( return true; } - // Vector type. const auto *VecT = Arg0Type->castAs<VectorType>(); const PrimType &ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + // Vector + Vector + Scalar case. + if (!Arg2Type->isVectorType()) { + APSInt Op2 = popToAPSInt( + S.Stk, *S.getContext().classify(Call->getArg(2)->getType())); + + const Pointer &Op1 = S.Stk.pop<Pointer>(); + const Pointer &Op0 = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt( + Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2), + DestUnsigned)); + }); + } + Dst.initializeAllElements(); + return true; + } + + // Vector type. const Pointer &Op2 = S.Stk.pop<Pointer>(); const Pointer &Op1 = S.Stk.pop<Pointer>(); const Pointer &Op0 = S.Stk.pop<Pointer>(); @@ -3299,6 +3326,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_pavgb128: + case clang::X86::BI__builtin_ia32_pavgw128: + case clang::X86::BI__builtin_ia32_pavgb256: + case clang::X86::BI__builtin_ia32_pavgw256: + case clang::X86::BI__builtin_ia32_pavgb512: + case clang::X86::BI__builtin_ia32_pavgw512: + return interp__builtin_elementwise_int_binop(S, OpPC, Call, + llvm::APIntOps::avgCeilU); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -3314,8 +3350,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: + case clang::X86::BI__builtin_ia32_psllv8di: + case clang::X86::BI__builtin_ia32_psllv8hi: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16hi: case clang::X86::BI__builtin_ia32_psllv16si: + case clang::X86::BI__builtin_ia32_psllv32hi: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_psllwi256: case clang::X86::BI__builtin_ia32_psllwi512: @@ -3334,8 +3374,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, }); case clang::X86::BI__builtin_ia32_psrav4si: + case clang::X86::BI__builtin_ia32_psrav8di: + case clang::X86::BI__builtin_ia32_psrav8hi: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16hi: case clang::X86::BI__builtin_ia32_psrav16si: + case clang::X86::BI__builtin_ia32_psrav32hi: + case clang::X86::BI__builtin_ia32_psravq128: + case clang::X86::BI__builtin_ia32_psravq256: case clang::X86::BI__builtin_ia32_psrawi128: case clang::X86::BI__builtin_ia32_psrawi256: case clang::X86::BI__builtin_ia32_psrawi512: @@ -3356,8 +3402,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psrlv2di: case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: + case clang::X86::BI__builtin_ia32_psrlv8di: + case clang::X86::BI__builtin_ia32_psrlv8hi: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16hi: case clang::X86::BI__builtin_ia32_psrlv16si: + case clang::X86::BI__builtin_ia32_psrlv32hi: case clang::X86::BI__builtin_ia32_psrlwi128: case clang::X86::BI__builtin_ia32_psrlwi256: case clang::X86::BI__builtin_ia32_psrlwi512: @@ -3421,6 +3471,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) { + return llvm::APIntOps::fshl(Hi, Lo, Amt); + }); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // NOTE: Reversed Hi/Lo operands. + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) { + return llvm::APIntOps::fshr(Hi, Lo, Amt); + }); + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: case clang::X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index 1ec4191..a95916c 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -25,6 +25,7 @@ InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, CheckingPotentialConstantExpression = Parent.CheckingPotentialConstantExpression; CheckingForUndefinedBehavior = Parent.CheckingForUndefinedBehavior; + EvalMode = Parent.EvalMode; } InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, @@ -36,6 +37,7 @@ InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, CheckingPotentialConstantExpression = Parent.CheckingPotentialConstantExpression; CheckingForUndefinedBehavior = Parent.CheckingForUndefinedBehavior; + EvalMode = Parent.EvalMode; } bool InterpState::inConstantContext() const { diff --git a/clang/lib/AST/ByteCode/State.h b/clang/lib/AST/ByteCode/State.h index 387ce39..a834eed 100644 --- a/clang/lib/AST/ByteCode/State.h +++ b/clang/lib/AST/ByteCode/State.h @@ -50,6 +50,27 @@ enum CheckSubobjectKind { CSK_VectorElement }; +enum class EvaluationMode { + /// Evaluate as a constant expression. Stop if we find that the expression + /// is not a constant expression. + ConstantExpression, + + /// Evaluate as a constant expression. Stop if we find that the expression + /// is not a constant expression. Some expressions can be retried in the + /// optimizer if we don't constant fold them here, but in an unevaluated + /// context we try to fold them immediately since the optimizer never + /// gets a chance to look at it. + ConstantExpressionUnevaluated, + + /// Fold the expression to a constant. Stop if we hit a side-effect that + /// we can't model. + ConstantFold, + + /// Evaluate in any way we know how. Don't worry about side-effects that + /// can't be modeled. + IgnoreSideEffects, +}; + namespace interp { class Frame; class SourceInfo; @@ -149,6 +170,8 @@ public: /// is set; this is used when evaluating ICEs in C. bool CheckingForUndefinedBehavior = false; + EvaluationMode EvalMode; + private: void addCallStack(unsigned Limit); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ca93073..5145896 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -186,7 +186,7 @@ namespace { /// Find the path length and type of the most-derived subobject in the given /// path, and find the size of the containing array, if any. static unsigned - findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base, + findMostDerivedSubobject(const ASTContext &Ctx, APValue::LValueBase Base, ArrayRef<APValue::LValuePathEntry> Path, uint64_t &ArraySize, QualType &Type, bool &IsArray, bool &FirstEntryIsUnsizedArray) { @@ -286,7 +286,7 @@ namespace { MostDerivedPathLength(0), MostDerivedArraySize(0), MostDerivedType(T.isNull() ? QualType() : T.getNonReferenceType()) {} - SubobjectDesignator(ASTContext &Ctx, const APValue &V) + SubobjectDesignator(const ASTContext &Ctx, const APValue &V) : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false), FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0) { @@ -926,27 +926,6 @@ namespace { /// fold (not just why it's not strictly a constant expression)? bool HasFoldFailureDiagnostic; - enum EvaluationMode { - /// Evaluate as a constant expression. Stop if we find that the expression - /// is not a constant expression. - EM_ConstantExpression, - - /// Evaluate as a constant expression. Stop if we find that the expression - /// is not a constant expression. Some expressions can be retried in the - /// optimizer if we don't constant fold them here, but in an unevaluated - /// context we try to fold them immediately since the optimizer never - /// gets a chance to look at it. - EM_ConstantExpressionUnevaluated, - - /// Fold the expression to a constant. Stop if we hit a side-effect that - /// we can't model. - EM_ConstantFold, - - /// Evaluate in any way we know how. Don't worry about side-effects that - /// can't be modeled. - EM_IgnoreSideEffects, - } EvalMode; - EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) : Ctx(const_cast<ASTContext &>(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), @@ -957,7 +936,9 @@ namespace { /*CallExpr=*/nullptr, CallRef()), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), - HasFoldFailureDiagnostic(false), EvalMode(Mode) {} + HasFoldFailureDiagnostic(false) { + EvalMode = Mode; + } ~EvalInfo() { discardCleanups(); @@ -1132,18 +1113,18 @@ namespace { // unless we require this evaluation to produce a constant expression. // // FIXME: We might want to show both diagnostics to the user in - // EM_ConstantFold mode. + // EvaluationMode::ConstantFold mode. bool hasPriorDiagnostic() override { if (!EvalStatus.Diag->empty()) { switch (EvalMode) { - case EM_ConstantFold: - case EM_IgnoreSideEffects: + case EvaluationMode::ConstantFold: + case EvaluationMode::IgnoreSideEffects: if (!HasFoldFailureDiagnostic) break; // We've already failed to fold something. Keep that diagnostic. [[fallthrough]]; - case EM_ConstantExpression: - case EM_ConstantExpressionUnevaluated: + case EvaluationMode::ConstantExpression: + case EvaluationMode::ConstantExpressionUnevaluated: setActiveDiagnostic(false); return true; } @@ -1158,12 +1139,12 @@ namespace { /// couldn't model? bool keepEvaluatingAfterSideEffect() const override { switch (EvalMode) { - case EM_IgnoreSideEffects: + case EvaluationMode::IgnoreSideEffects: return true; - case EM_ConstantExpression: - case EM_ConstantExpressionUnevaluated: - case EM_ConstantFold: + case EvaluationMode::ConstantExpression: + case EvaluationMode::ConstantExpressionUnevaluated: + case EvaluationMode::ConstantFold: // By default, assume any side effect might be valid in some other // evaluation of this expression from a different context. return checkingPotentialConstantExpression() || @@ -1182,12 +1163,12 @@ namespace { /// Should we continue evaluation after encountering undefined behavior? bool keepEvaluatingAfterUndefinedBehavior() { switch (EvalMode) { - case EM_IgnoreSideEffects: - case EM_ConstantFold: + case EvaluationMode::IgnoreSideEffects: + case EvaluationMode::ConstantFold: return true; - case EM_ConstantExpression: - case EM_ConstantExpressionUnevaluated: + case EvaluationMode::ConstantExpression: + case EvaluationMode::ConstantExpressionUnevaluated: return checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); @@ -1208,10 +1189,10 @@ namespace { return false; switch (EvalMode) { - case EM_ConstantExpression: - case EM_ConstantExpressionUnevaluated: - case EM_ConstantFold: - case EM_IgnoreSideEffects: + case EvaluationMode::ConstantExpression: + case EvaluationMode::ConstantExpressionUnevaluated: + case EvaluationMode::ConstantFold: + case EvaluationMode::IgnoreSideEffects: return checkingPotentialConstantExpression() || checkingForUndefinedBehavior(); } @@ -1261,7 +1242,7 @@ namespace { EvalInfo &Info; bool Enabled; bool HadNoPriorDiags; - EvalInfo::EvaluationMode OldMode; + EvaluationMode OldMode; explicit FoldConstant(EvalInfo &Info, bool Enabled) : Info(Info), @@ -1271,7 +1252,7 @@ namespace { !Info.EvalStatus.HasSideEffects), OldMode(Info.EvalMode) { if (Enabled) - Info.EvalMode = EvalInfo::EM_ConstantFold; + Info.EvalMode = EvaluationMode::ConstantFold; } void keepDiagnostics() { Enabled = false; } ~FoldConstant() { @@ -1286,10 +1267,10 @@ namespace { /// side-effects. struct IgnoreSideEffectsRAII { EvalInfo &Info; - EvalInfo::EvaluationMode OldMode; + EvaluationMode OldMode; explicit IgnoreSideEffectsRAII(EvalInfo &Info) : Info(Info), OldMode(Info.EvalMode) { - Info.EvalMode = EvalInfo::EM_IgnoreSideEffects; + Info.EvalMode = EvaluationMode::IgnoreSideEffects; } ~IgnoreSideEffectsRAII() { Info.EvalMode = OldMode; } @@ -1589,7 +1570,7 @@ namespace { if (AllowConstexprUnknown) V.setConstexprUnknown(); } - void setFrom(ASTContext &Ctx, const APValue &V) { + void setFrom(const ASTContext &Ctx, const APValue &V) { assert(V.isLValue() && "Setting LValue from a non-LValue?"); Base = V.getLValueBase(); Offset = V.getLValueOffset(); @@ -9188,7 +9169,7 @@ bool LValueExprEvaluator::VisitMaterializeTemporaryExpr( // value for use outside this evaluation. APValue *Value; if (E->getStorageDuration() == SD_Static) { - if (Info.EvalMode == EvalInfo::EM_ConstantFold) + if (Info.EvalMode == EvaluationMode::ConstantFold) return false; // FIXME: What about SD_Thread? Value = E->getOrCreateValue(true); @@ -11694,6 +11675,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_pavgb128: + case clang::X86::BI__builtin_ia32_pavgw128: + case clang::X86::BI__builtin_ia32_pavgb256: + case clang::X86::BI__builtin_ia32_pavgw256: + case clang::X86::BI__builtin_ia32_pavgb512: + case clang::X86::BI__builtin_ia32_pavgw512: + return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -11707,8 +11696,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: + case clang::X86::BI__builtin_ia32_psllv8di: + case clang::X86::BI__builtin_ia32_psllv8hi: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16hi: case clang::X86::BI__builtin_ia32_psllv16si: + case clang::X86::BI__builtin_ia32_psllv32hi: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_pslldi128: case clang::X86::BI__builtin_ia32_psllqi128: @@ -11726,8 +11719,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { }); case clang::X86::BI__builtin_ia32_psrav4si: + case clang::X86::BI__builtin_ia32_psrav8di: + case clang::X86::BI__builtin_ia32_psrav8hi: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16hi: case clang::X86::BI__builtin_ia32_psrav16si: + case clang::X86::BI__builtin_ia32_psrav32hi: + case clang::X86::BI__builtin_ia32_psravq128: + case clang::X86::BI__builtin_ia32_psravq256: case clang::X86::BI__builtin_ia32_psrawi128: case clang::X86::BI__builtin_ia32_psradi128: case clang::X86::BI__builtin_ia32_psraqi128: @@ -11747,8 +11746,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psrlv2di: case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: + case clang::X86::BI__builtin_ia32_psrlv8di: + case clang::X86::BI__builtin_ia32_psrlv8hi: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16hi: case clang::X86::BI__builtin_ia32_psrlv16si: + case clang::X86::BI__builtin_ia32_psrlv32hi: case clang::X86::BI__builtin_ia32_psrlwi128: case clang::X86::BI__builtin_ia32_psrldi128: case clang::X86::BI__builtin_ia32_psrlqi128: @@ -11860,6 +11863,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: { + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(1), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: { + // NOTE: Reversed Hi/Lo operands. + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(1), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshr(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: @@ -13467,12 +13533,12 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, // Expression had no side effects, but we couldn't statically determine the // size of the referenced object. switch (Info.EvalMode) { - case EvalInfo::EM_ConstantExpression: - case EvalInfo::EM_ConstantFold: - case EvalInfo::EM_IgnoreSideEffects: + case EvaluationMode::ConstantExpression: + case EvaluationMode::ConstantFold: + case EvaluationMode::IgnoreSideEffects: // Leave it to IR generation. return Error(E); - case EvalInfo::EM_ConstantExpressionUnevaluated: + case EvaluationMode::ConstantExpressionUnevaluated: // Reduce it to a constant now. return Success((Type & 2) ? 0 : -1, E); } @@ -17478,7 +17544,7 @@ bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsRValue"); - EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, Result, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsRValue(this, Result, Ctx, Info); } @@ -17499,7 +17565,7 @@ bool Expr::EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsInt"); - EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, Result, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsInt(this, Result, Ctx, AllowSideEffects, Info); } @@ -17510,7 +17576,7 @@ bool Expr::EvaluateAsFixedPoint(EvalResult &Result, const ASTContext &Ctx, assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsFixedPoint"); - EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, Result, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsFixedPoint(this, Result, Ctx, AllowSideEffects, Info); } @@ -17541,10 +17607,22 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, "Expression evaluator can't be called on a dependent expression."); ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsLValue"); - EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold); + EvalInfo Info(Ctx, Result, EvaluationMode::ConstantFold); Info.InConstantContext = InConstantContext; LValue LV; CheckedTemporaries CheckedTemps; + + if (Info.EnableNewConstInterp) { + if (!Info.Ctx.getInterpContext().evaluate(Info, this, Result.Val, + ConstantExprKind::Normal)) + return false; + + LV.setFrom(Ctx, Result.Val); + return CheckLValueConstantExpression( + Info, getExprLoc(), Ctx.getLValueReferenceType(getType()), LV, + ConstantExprKind::Normal, CheckedTemps); + } + if (!EvaluateLValue(this, LV, Info) || !Info.discardCleanups() || Result.HasSideEffects || !CheckLValueConstantExpression(Info, getExprLoc(), @@ -17561,8 +17639,8 @@ static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base, SourceLocation Loc, Expr::EvalStatus &EStatus, bool IsConstantDestruction) { EvalInfo Info(Ctx, EStatus, - IsConstantDestruction ? EvalInfo::EM_ConstantExpression - : EvalInfo::EM_ConstantFold); + IsConstantDestruction ? EvaluationMode::ConstantExpression + : EvaluationMode::ConstantFold); Info.setEvaluatingDecl(Base, DestroyedValue, EvalInfo::EvaluatingDeclKind::Dtor); Info.InConstantContext = IsConstantDestruction; @@ -17590,7 +17668,7 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, return true; ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsConstantExpr"); - EvalInfo::EvaluationMode EM = EvalInfo::EM_ConstantExpression; + EvaluationMode EM = EvaluationMode::ConstantExpression; EvalInfo Info(Ctx, Result, EM); Info.InConstantContext = true; @@ -17667,8 +17745,8 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, EvalInfo Info(Ctx, EStatus, (IsConstantInitialization && (Ctx.getLangOpts().CPlusPlus || Ctx.getLangOpts().C23)) - ? EvalInfo::EM_ConstantExpression - : EvalInfo::EM_ConstantFold); + ? EvaluationMode::ConstantExpression + : EvaluationMode::ConstantFold); Info.setEvaluatingDecl(VD, Value); Info.InConstantContext = IsConstantInitialization; @@ -17763,7 +17841,7 @@ APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx, ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateKnownConstInt"); EvalResult EVResult; EVResult.Diag = Diag; - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, EVResult, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = true; bool Result = ::EvaluateAsRValue(this, EVResult, Ctx, Info); @@ -17782,7 +17860,7 @@ APSInt Expr::EvaluateKnownConstIntCheckOverflow( ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateKnownConstIntCheckOverflow"); EvalResult EVResult; EVResult.Diag = Diag; - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, EVResult, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = true; Info.CheckingForUndefinedBehavior = true; @@ -17802,7 +17880,7 @@ void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { bool IsConst; EvalResult EVResult; if (!FastEvaluateAsRValue(this, EVResult.Val, Ctx, IsConst)) { - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, EVResult, EvaluationMode::IgnoreSideEffects); Info.CheckingForUndefinedBehavior = true; (void)::EvaluateAsRValue(Info, this, EVResult.Val); } @@ -17856,7 +17934,7 @@ static ICEDiag Worst(ICEDiag A, ICEDiag B) { return A.Kind >= B.Kind ? A : B; } static ICEDiag CheckEvalInICE(const Expr* E, const ASTContext &Ctx) { Expr::EvalResult EVResult; Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantExpression); Info.InConstantContext = true; if (!::EvaluateAsRValue(E, EVResult, Ctx, Info) || EVResult.HasSideEffects || @@ -18340,7 +18418,7 @@ Expr::getIntegerConstantExpr(const ASTContext &Ctx) const { // value. EvalResult ExprResult; Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_IgnoreSideEffects); + EvalInfo Info(Ctx, Status, EvaluationMode::IgnoreSideEffects); Info.InConstantContext = true; if (!::EvaluateAsInt(this, ExprResult, Ctx, SE_AllowSideEffects, Info)) @@ -18376,7 +18454,7 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result) const { Expr::EvalStatus Status; SmallVector<PartialDiagnosticAt, 8> Diags; Status.Diag = &Diags; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantExpression); bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch) && @@ -18403,7 +18481,7 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, }); Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantExpressionUnevaluated); Info.InConstantContext = true; LValue ThisVal; @@ -18479,7 +18557,8 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, Expr::EvalStatus Status; Status.Diag = &Diags; - EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_ConstantExpression); + EvalInfo Info(FD->getASTContext(), Status, + EvaluationMode::ConstantExpression); Info.InConstantContext = true; Info.CheckingPotentialConstantExpression = true; @@ -18529,7 +18608,7 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E, Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, - EvalInfo::EM_ConstantExpressionUnevaluated); + EvaluationMode::ConstantExpressionUnevaluated); Info.InConstantContext = true; Info.CheckingPotentialConstantExpression = true; @@ -18553,7 +18632,7 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, return false; Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantFold); return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } @@ -18611,7 +18690,7 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const { Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantFold); uint64_t Result; std::string StringResult; @@ -18626,7 +18705,7 @@ static bool EvaluateCharRangeAsStringImpl(const Expr *, T &Result, const Expr *PtrExpression, ASTContext &Ctx, Expr::EvalResult &Status) { - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantExpression); Info.InConstantContext = true; if (Info.EnableNewConstInterp) @@ -18694,7 +18773,7 @@ bool Expr::EvaluateCharRangeAsString(APValue &Result, bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const { Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); + EvalInfo Info(Ctx, Status, EvaluationMode::ConstantFold); if (Info.EnableNewConstInterp) return Info.Ctx.getInterpContext().evaluateStrlen(Info, this, Result); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index ffadfce..163cd43 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -6026,6 +6026,8 @@ void CXXNameMangler::mangleCXXCtorType(CXXCtorType T, // ::= CI2 <type> # base inheriting constructor // // In addition, C5 is a comdat name with C1 and C2 in it. + // C4 represents a ctor declaration and is used by debuggers to look up + // the various ctor variants. Out << 'C'; if (InheritedFrom) Out << 'I'; @@ -6036,6 +6038,9 @@ void CXXNameMangler::mangleCXXCtorType(CXXCtorType T, case Ctor_Base: Out << '2'; break; + case Ctor_Unified: + Out << '4'; + break; case Ctor_Comdat: Out << '5'; break; @@ -6053,6 +6058,8 @@ void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) { // ::= D2 # base object destructor // // In addition, D5 is a comdat name with D1, D2 and, if virtual, D0 in it. + // D4 represents a dtor declaration and is used by debuggers to look up + // the various dtor variants. switch (T) { case Dtor_Deleting: Out << "D0"; @@ -6063,6 +6070,9 @@ void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) { case Dtor_Base: Out << "D2"; break; + case Dtor_Unified: + Out << "D4"; + break; case Dtor_Comdat: Out << "D5"; break; diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 0bfb51c..780b2c5 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -152,6 +152,37 @@ bool MangleContext::shouldMangleDeclName(const NamedDecl *D) { return shouldMangleCXXName(D); } +static llvm::StringRef g_lldb_func_call_label_prefix = "$__lldb_func:"; + +/// Given an LLDB function call label, this function prints the label +/// into \c Out, together with the structor type of \c GD (if the +/// decl is a constructor/destructor). LLDB knows how to handle mangled +/// names with this encoding. +/// +/// Example input label: +/// $__lldb_func::123:456:~Foo +/// +/// Example output: +/// $__lldb_func:D1:123:456:~Foo +/// +static void emitLLDBAsmLabel(llvm::StringRef label, GlobalDecl GD, + llvm::raw_ostream &Out) { + assert(label.starts_with(g_lldb_func_call_label_prefix)); + + Out << g_lldb_func_call_label_prefix; + + if (auto *Ctor = llvm::dyn_cast<clang::CXXConstructorDecl>(GD.getDecl())) { + Out << "C"; + if (Ctor->getInheritedConstructor().getConstructor()) + Out << "I"; + Out << GD.getCtorType(); + } else if (llvm::isa<clang::CXXDestructorDecl>(GD.getDecl())) { + Out << "D" << GD.getDtorType(); + } + + Out << label.substr(g_lldb_func_call_label_prefix.size()); +} + void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) { const ASTContext &ASTContext = getASTContext(); const NamedDecl *D = cast<NamedDecl>(GD.getDecl()); @@ -185,7 +216,11 @@ void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) { if (!UserLabelPrefix.empty()) Out << '\01'; // LLVM IR Marker for __asm("foo") - Out << ALA->getLabel(); + if (ALA->getLabel().starts_with(g_lldb_func_call_label_prefix)) + emitLLDBAsmLabel(ALA->getLabel(), GD, Out); + else + Out << ALA->getLabel(); + return; } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 2ac38a2..d96472e 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1496,6 +1496,8 @@ void MicrosoftCXXNameMangler::mangleCXXDtorType(CXXDtorType T) { // it. case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); + case Dtor_Unified: + llvm_unreachable("not expecting a unified dtor type"); } llvm_unreachable("Unsupported dtor type?"); } diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 4b312c5..43f4e07 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -187,6 +187,7 @@ void EmptySubobjectMap::ComputeEmptySubobjectSizes() { // Check the bases. for (const CXXBaseSpecifier &Base : Class->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); + assert(BaseDecl != Class && "Class cannot inherit from itself."); CharUnits EmptySize; const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 45c307d..bb703ef 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -982,6 +982,20 @@ auto buildTransferMatchSwitch() { isOptionalMemberCallWithNameMatcher(hasName("isNull")), transferOptionalIsNullCall) + // NullableValue::makeValue, NullableValue::makeValueInplace + // Only NullableValue has these methods, but this + // will also pass for other types + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher( + hasAnyName("makeValue", "makeValueInplace")), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (RecordStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + setHasValue(*Loc, State.Env.getBoolLiteralValue(true), State.Env); + } + }) + // optional::emplace .CaseOfCFGStmt<CXXMemberCallExpr>( isOptionalMemberCallWithNameMatcher(hasName("emplace")), diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp index b840123..ed827ac 100644 --- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp +++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -11,6 +11,9 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Type.h" #define DEBUG_TYPE "dataflow" @@ -49,25 +52,30 @@ static void copySyntheticField(QualType FieldType, StorageLocation &SrcFieldLoc, } void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, - Environment &Env) { + Environment &Env, const QualType TypeToCopy) { auto SrcType = Src.getType().getCanonicalType().getUnqualifiedType(); auto DstType = Dst.getType().getCanonicalType().getUnqualifiedType(); auto SrcDecl = SrcType->getAsCXXRecordDecl(); auto DstDecl = DstType->getAsCXXRecordDecl(); - [[maybe_unused]] bool compatibleTypes = + const CXXRecordDecl *DeclToCopy = + TypeToCopy.isNull() ? nullptr : TypeToCopy->getAsCXXRecordDecl(); + + [[maybe_unused]] bool CompatibleTypes = SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && - (SrcDecl->isDerivedFrom(DstDecl) || DstDecl->isDerivedFrom(SrcDecl))); + (SrcDecl->isDerivedFrom(DstDecl) || DstDecl->isDerivedFrom(SrcDecl) || + (DeclToCopy != nullptr && SrcDecl->isDerivedFrom(DeclToCopy) && + DstDecl->isDerivedFrom(DeclToCopy)))); LLVM_DEBUG({ - if (!compatibleTypes) { + if (!CompatibleTypes) { llvm::dbgs() << "Source type " << Src.getType() << "\n"; llvm::dbgs() << "Destination type " << Dst.getType() << "\n"; } }); - assert(compatibleTypes); + assert(CompatibleTypes); if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && SrcDecl->isDerivedFrom(DstDecl))) { @@ -76,12 +84,24 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields()) copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), *DstFieldLoc, Env); - } else { + } else if (SrcDecl != nullptr && DstDecl != nullptr && + DstDecl->isDerivedFrom(SrcDecl)) { for (auto [Field, SrcFieldLoc] : Src.children()) copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, Dst.getSyntheticField(Name), Env); + } else { + for (const FieldDecl *Field : + Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) { + copyField(*Field, Src.getChild(*Field), Dst.getChild(*Field), Dst, Env); + } + for (const auto &[SyntheticFieldName, SyntheticFieldType] : + Env.getDataflowAnalysisContext().getSyntheticFields(TypeToCopy)) { + copySyntheticField(SyntheticFieldType, + Src.getSyntheticField(SyntheticFieldName), + Dst.getSyntheticField(SyntheticFieldName), Env); + } } } diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 86a816e..60371d9 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -20,14 +20,17 @@ #include "clang/AST/OperationKinds.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/ASTOps.h" #include "clang/Analysis/FlowSensitive/AdornedCFG.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/NoopAnalysis.h" #include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/LLVM.h" #include "clang/Basic/OperatorKinds.h" #include "llvm/Support/Casting.h" #include <assert.h> @@ -287,7 +290,7 @@ public: } } - void VisitImplicitCastExpr(const ImplicitCastExpr *S) { + void VisitCastExpr(const CastExpr *S) { const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); @@ -317,6 +320,60 @@ public: break; } + case CK_BaseToDerived: { + // This is a cast of (single-layer) pointer or reference to a record type. + // We should now model the fields for the derived type. + + // Get the RecordStorageLocation for the record object underneath. + RecordStorageLocation *Loc = nullptr; + if (S->getType()->isPointerType()) { + auto *PV = Env.get<PointerValue>(*SubExpr); + assert(PV != nullptr); + if (PV == nullptr) + break; + Loc = cast<RecordStorageLocation>(&PV->getPointeeLoc()); + } else { + assert(S->getType()->isRecordType()); + if (SubExpr->isGLValue()) { + Loc = Env.get<RecordStorageLocation>(*SubExpr); + } else { + Loc = &Env.getResultObjectLocation(*SubExpr); + } + } + if (!Loc) { + // Nowhere to add children or propagate from, so we're done. + break; + } + + // Get the derived record type underneath the reference or pointer. + QualType Derived = S->getType().getNonReferenceType(); + if (Derived->isPointerType()) { + Derived = Derived->getPointeeType(); + } + + // Add children to the storage location for fields (including synthetic + // fields) of the derived type and initialize their values. + for (const FieldDecl *Field : + Env.getDataflowAnalysisContext().getModeledFields(Derived)) { + assert(Field != nullptr); + QualType FieldType = Field->getType(); + if (FieldType->isReferenceType()) { + Loc->addChild(*Field, nullptr); + } else { + Loc->addChild(*Field, &Env.createStorageLocation(FieldType)); + } + + for (const auto &Entry : + Env.getDataflowAnalysisContext().getSyntheticFields(Derived)) { + Loc->addSyntheticField(Entry.getKey(), + Env.createStorageLocation(Entry.getValue())); + } + } + Env.initializeFieldsWithValues(*Loc, Derived); + + // Fall through to propagate SubExpr's StorageLocation to the CastExpr. + [[fallthrough]]; + } case CK_IntegralCast: // FIXME: This cast creates a new integral value from the // subexpression. But, because we don't model integers, we don't @@ -324,10 +381,9 @@ public: // modeling is added, then update this code to create a fresh location and // value. case CK_UncheckedDerivedToBase: + case CK_DerivedToBase: case CK_ConstructorConversion: case CK_UserDefinedConversion: - // FIXME: Add tests that excercise CK_UncheckedDerivedToBase, - // CK_ConstructorConversion, and CK_UserDefinedConversion. case CK_NoOp: { // FIXME: Consider making `Environment::getStorageLocation` skip noop // expressions (this and other similar expressions in the file) instead @@ -554,7 +610,15 @@ public: // Even if the copy/move constructor call is elidable, we choose to copy // the record in all cases (which isn't wrong, just potentially not // optimal). - copyRecord(*ArgLoc, Loc, Env); + // + // To handle cases of base class initializers in constructors, where a + // sibling derived class can be used to initialize a shared-base-class + // subobject through a DerivedToBase cast, intentionally copy only the + // parts of `ArgLoc` that are part of the base class being initialized. + // This is necessary because the type of `Loc` in these cases is the + // derived type ultimately being constructed, not the type of the base + // class subobject. + copyRecord(*ArgLoc, Loc, Env, S->getType()); return; } @@ -684,15 +748,6 @@ public: propagateValue(*SubExpr, *S, Env); } - void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { - if (S->getCastKind() == CK_NoOp) { - const Expr *SubExpr = S->getSubExpr(); - assert(SubExpr != nullptr); - - propagateValueOrStorageLocation(*SubExpr, *S, Env); - } - } - void VisitConditionalOperator(const ConditionalOperator *S) { const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr()); const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr()); diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index dbbf7f3..e687e54 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -50,6 +50,11 @@ struct Loan { Loan(LoanID id, AccessPath path, const Expr *IssueExpr) : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; + } }; /// An Origin is a symbolic identifier that represents the set of possible @@ -120,17 +125,19 @@ public: // TODO: Mark this method as const once we remove the call to getOrCreate. OriginID get(const Expr &E) { - // Origin of DeclRefExpr is that of the declaration it refers to. + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) return get(*DRE->getDecl()); - auto It = ExprToOriginID.find(&E); // TODO: This should be an assert(It != ExprToOriginID.end()). The current // implementation falls back to getOrCreate to avoid crashing on // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == ExprToOriginID.end()) - return getOrCreate(E); - - return It->second; + return getOrCreate(E); } OriginID get(const ValueDecl &D) { @@ -149,10 +156,6 @@ public: if (It != ExprToOriginID.end()) return It->second; - if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { - // Origin of DeclRefExpr is that of the declaration it refers to. - return getOrCreate(*DRE->getDecl()); - } OriginID NewID = getNextOriginID(); addOrigin(NewID, E); ExprToOriginID[&E] = NewID; @@ -235,7 +238,8 @@ public: return nullptr; } - virtual void dump(llvm::raw_ostream &OS, const OriginManager &) const { + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; } }; @@ -250,8 +254,11 @@ public: IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} LoanID getLoanID() const { return LID; } OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { - OS << "Issue (LoanID: " << getLoanID() << ", ToOrigin: "; + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; OM.dump(getOriginID(), OS); OS << ")\n"; } @@ -270,8 +277,11 @@ public: LoanID getLoanID() const { return LID; } SourceLocation getExpiryLoc() const { return ExpiryLoc; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { - OS << "Expire (LoanID: " << getLoanID() << ")\n"; + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; } }; @@ -288,7 +298,8 @@ public: : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} OriginID getDestOriginID() const { return OIDDest; } OriginID getSrcOriginID() const { return OIDSrc; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "AssignOrigin (Dest: "; OM.dump(getDestOriginID(), OS); OS << ", Src: "; @@ -307,7 +318,8 @@ public: ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "ReturnOfOrigin ("; OM.dump(getReturnedOriginID(), OS); OS << ")\n"; @@ -333,10 +345,11 @@ public: void markAsWritten() { IsWritten = true; } bool isWritten() const { return IsWritten; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "Use ("; OM.dump(getUsedOrigin(OM), OS); - OS << " " << (isWritten() ? "Write" : "Read") << ")\n"; + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; } }; @@ -353,7 +366,8 @@ public: StringRef getAnnotation() const { return Annotation; } - void dump(llvm::raw_ostream &OS, const OriginManager &) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override { OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; } }; @@ -392,7 +406,7 @@ public: if (It != BlockToFactsMap.end()) { for (const Fact *F : It->second) { llvm::dbgs() << " "; - F->dump(llvm::dbgs(), OriginMgr); + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); } } llvm::dbgs() << " End of Block\n"; @@ -438,12 +452,31 @@ public: void VisitDeclStmt(const DeclStmt *DS) { for (const Decl *D : DS->decls()) if (const auto *VD = dyn_cast<VarDecl>(D)) - if (hasOrigin(VD->getType())) + if (hasOrigin(VD)) if (const Expr *InitExpr = VD->getInit()) addAssignOriginFact(*VD, *InitExpr); } - void VisitDeclRefExpr(const DeclRefExpr *DRE) { handleUse(DRE); } + void VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L->ID, ExprOID)); + } + } + } void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized @@ -452,38 +485,31 @@ public: } void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE->getType())) + if (!hasOrigin(ICE)) return; // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). - // TODO: Consider if this is actually useful in practice. Alternatively, we - // could directly use the sub-expression's OriginID instead of creating a - // new one. addAssignOriginFact(*ICE, *ICE->getSubExpr()); } void VisitUnaryOperator(const UnaryOperator *UO) { if (UO->getOpcode() == UO_AddrOf) { const Expr *SubExpr = UO->getSubExpr(); - if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) { - if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { - // Check if it's a local variable. - if (VD->hasLocalStorage()) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); - AccessPath AddrOfLocalVarPath(VD); - const Loan &L = - FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, UO); - CurrentBlockFacts.push_back( - FactMgr.createFact<IssueFact>(L.ID, OID)); - } - } - } + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + addAssignOriginFact(*UO, *SubExpr); } } void VisitReturnStmt(const ReturnStmt *RS) { if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr->getType())) { + if (hasOrigin(RetExpr)) { OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); CurrentBlockFacts.push_back( FactMgr.createFact<ReturnOfOriginFact>(OID)); @@ -506,20 +532,6 @@ public: // expression. if (VisitTestPoint(FCE)) return; - // Visit as normal otherwise. - Base::VisitCXXFunctionalCastExpr(FCE); - } - -private: - // Check if a type has an origin. - bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } - - template <typename Destination, typename Source> - void addAssignOriginFact(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID)); } void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { @@ -544,6 +556,41 @@ private: } } +private: + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType(); + } + + // Check if a type has an origin. + static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); + } + + static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); + } + + /// Creates a loan for the storage path of a given declaration reference. + /// This function should be called whenever a DeclRefExpr represents a borrow. + /// \param DRE The declaration reference expression that initiates the borrow. + /// \return The new Loan on success, nullptr otherwise. + const Loan *createLoan(const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; + } + + template <typename Destination, typename Source> + void addAssignOriginFact(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID)); + } + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. /// If so, creates a `TestPointFact` and returns true. bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { @@ -566,25 +613,26 @@ private: } void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; // Find the underlying variable declaration for the left-hand side. if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { markUseAsWrite(DRE_LHS); if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) - if (hasOrigin(LHSExpr->getType())) - // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` - // LHS must be a pointer/reference type that can be an origin. - // RHS must also represent an origin (either another pointer/ref or an - // address-of). - addAssignOriginFact(*VD_LHS, *RHSExpr); + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`. + // LHS must be a pointer/reference type that can be an origin. RHS must + // also represent an origin (either another pointer/ref or an + // address-of). + addAssignOriginFact(*VD_LHS, *RHSExpr); } } - // A DeclRefExpr is a use of the referenced decl. It is checked for - // use-after-free unless it is being written to (e.g. on the left-hand side - // of an assignment). + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). void handleUse(const DeclRefExpr *DRE) { - if (hasOrigin(DRE->getType())) { + if (isPointerType(DRE->getType())) { UseFact *UF = FactMgr.createFact<UseFact>(DRE); CurrentBlockFacts.push_back(UF); assert(!UseFacts.contains(DRE)); diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp index 375fdb3..74b930b 100644 --- a/clang/lib/Analysis/LiveVariables.cpp +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -18,6 +18,7 @@ #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" #include <optional> @@ -35,7 +36,7 @@ public: llvm::DenseMap<const CFGBlock *, LiveVariables::LivenessValues> blocksEndToLiveness; llvm::DenseMap<const CFGBlock *, LiveVariables::LivenessValues> blocksBeginToLiveness; llvm::DenseMap<const Stmt *, LiveVariables::LivenessValues> stmtsToLiveness; - llvm::DenseMap<const DeclRefExpr *, unsigned> inAssignment; + llvm::DenseSet<const DeclRefExpr *> inAssignment; const bool killAtAssign; LiveVariables::LivenessValues @@ -71,15 +72,17 @@ bool LiveVariables::LivenessValues::isLive(const Expr *E) const { bool LiveVariables::LivenessValues::isLive(const VarDecl *D) const { if (const auto *DD = dyn_cast<DecompositionDecl>(D)) { - bool alive = false; - for (const BindingDecl *BD : DD->bindings()) - alive |= liveBindings.contains(BD); - // Note: the only known case this condition is necessary, is when a bindig // to a tuple-like structure is created. The HoldingVar initializers have a // DeclRefExpr to the DecompositionDecl. - alive |= liveDecls.contains(DD); - return alive; + if (liveDecls.contains(DD)) + return true; + + for (const BindingDecl *BD : DD->bindings()) { + if (liveBindings.contains(BD)) + return true; + } + return false; } return liveDecls.contains(D); } @@ -90,8 +93,8 @@ namespace { if (A.isEmpty()) return B; - for (typename SET::iterator it = B.begin(), ei = B.end(); it != ei; ++it) { - A = A.add(*it); + for (const auto *Elem : B) { + A = A.add(Elem); } return A; } @@ -127,8 +130,9 @@ LiveVariablesImpl::merge(LiveVariables::LivenessValues valsA, BSetRefA.asImmutableSet()); } -bool LiveVariables::LivenessValues::equals(const LivenessValues &V) const { - return liveExprs == V.liveExprs && liveDecls == V.liveDecls; +bool LiveVariables::LivenessValues::operator==(const LivenessValues &V) const { + return liveExprs == V.liveExprs && liveDecls == V.liveDecls && + liveBindings == V.liveBindings; } //===----------------------------------------------------------------------===// @@ -174,7 +178,6 @@ public: void VisitDeclStmt(DeclStmt *DS); void VisitObjCForCollectionStmt(ObjCForCollectionStmt *OS); void VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE); - void VisitUnaryOperator(UnaryOperator *UO); void Visit(Stmt *S); }; } // namespace @@ -370,7 +373,7 @@ static bool writeShouldKill(const VarDecl *VD) { void TransferFunctions::VisitBinaryOperator(BinaryOperator *B) { if (LV.killAtAssign && B->getOpcode() == BO_Assign) { if (const auto *DR = dyn_cast<DeclRefExpr>(B->getLHS()->IgnoreParens())) { - LV.inAssignment[DR] = 1; + LV.inAssignment.insert(DR); } } if (B->isAssignmentOp()) { @@ -396,11 +399,7 @@ void TransferFunctions::VisitBinaryOperator(BinaryOperator *B) { Killed = writeShouldKill(VD); if (Killed) val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); - } - - if (Killed && observer) - observer->observerKill(DR); } } } @@ -416,7 +415,7 @@ void TransferFunctions::VisitBlockExpr(BlockExpr *BE) { void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *DR) { const Decl* D = DR->getDecl(); - bool InAssignment = LV.inAssignment[DR]; + bool InAssignment = LV.inAssignment.contains(DR); if (const auto *BD = dyn_cast<BindingDecl>(D)) { if (!InAssignment) { if (const auto *HV = BD->getHoldingVar()) @@ -465,8 +464,6 @@ void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *OS) { if (VD) { val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); - if (observer && DR) - observer->observerKill(DR); } } @@ -486,32 +483,6 @@ VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE) } } -void TransferFunctions::VisitUnaryOperator(UnaryOperator *UO) { - // Treat ++/-- as a kill. - // Note we don't actually have to do anything if we don't have an observer, - // since a ++/-- acts as both a kill and a "use". - if (!observer) - return; - - switch (UO->getOpcode()) { - default: - return; - case UO_PostInc: - case UO_PostDec: - case UO_PreInc: - case UO_PreDec: - break; - } - - if (auto *DR = dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParens())) { - const Decl *D = DR->getDecl(); - if (isa<VarDecl>(D) || isa<BindingDecl>(D)) { - // Treat ++/-- as a kill. - observer->observerKill(DR); - } - } -} - LiveVariables::LivenessValues LiveVariablesImpl::runOnBlock(const CFGBlock *block, LiveVariables::LivenessValues val, @@ -588,16 +559,15 @@ LiveVariables::computeLiveness(AnalysisDeclContext &AC, bool killAtAssign) { // Merge the values of all successor blocks. LivenessValues val; - for (CFGBlock::const_succ_iterator it = block->succ_begin(), - ei = block->succ_end(); it != ei; ++it) { - if (const CFGBlock *succ = *it) { + for (const CFGBlock *succ : block->succs()) { + if (succ) { val = LV->merge(val, LV->blocksBeginToLiveness[succ]); } } if (!everAnalyzedBlock[block->getBlockID()]) everAnalyzedBlock[block->getBlockID()] = true; - else if (prevVal.equals(val)) + else if (prevVal == val) continue; prevVal = val; @@ -618,38 +588,26 @@ void LiveVariables::dumpBlockLiveness(const SourceManager &M) { void LiveVariablesImpl::dumpBlockLiveness(const SourceManager &M) { std::vector<const CFGBlock *> vec; - for (const auto &KV : blocksEndToLiveness) { - vec.push_back(KV.first); - } + vec.reserve(blocksEndToLiveness.size()); + llvm::append_range(vec, llvm::make_first_range(blocksEndToLiveness)); llvm::sort(vec, [](const CFGBlock *A, const CFGBlock *B) { return A->getBlockID() < B->getBlockID(); }); std::vector<const VarDecl*> declVec; - for (std::vector<const CFGBlock *>::iterator - it = vec.begin(), ei = vec.end(); it != ei; ++it) { - llvm::errs() << "\n[ B" << (*it)->getBlockID() + for (const CFGBlock *block : vec) { + llvm::errs() << "\n[ B" << block->getBlockID() << " (live variables at block exit) ]\n"; - - LiveVariables::LivenessValues vals = blocksEndToLiveness[*it]; declVec.clear(); - - for (llvm::ImmutableSet<const VarDecl *>::iterator si = - vals.liveDecls.begin(), - se = vals.liveDecls.end(); si != se; ++si) { - declVec.push_back(*si); - } - + llvm::append_range(declVec, blocksEndToLiveness[block].liveDecls); llvm::sort(declVec, [](const Decl *A, const Decl *B) { return A->getBeginLoc() < B->getBeginLoc(); }); - for (std::vector<const VarDecl*>::iterator di = declVec.begin(), - de = declVec.end(); di != de; ++di) { - llvm::errs() << " " << (*di)->getDeclName().getAsString() - << " <"; - (*di)->getLocation().print(llvm::errs(), M); + for (const VarDecl *VD : declVec) { + llvm::errs() << " " << VD->getDeclName().getAsString() << " <"; + VD->getLocation().print(llvm::errs(), M); llvm::errs() << ">\n"; } } diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 131170d..cee98d5 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -39,6 +39,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" @@ -537,6 +538,13 @@ public: protected: friend class VarMapBuilder; + // Resolve any definition ID down to its non-reference base ID. + unsigned getCanonicalDefinitionID(unsigned ID) { + while (ID > 0 && VarDefinitions[ID].isReference()) + ID = VarDefinitions[ID].Ref; + return ID; + } + // Get the current context index unsigned getContextIndex() { return SavedContexts.size()-1; } @@ -621,6 +629,7 @@ public: void VisitDeclStmt(const DeclStmt *S); void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCallExpr(const CallExpr *CE); }; } // namespace @@ -666,6 +675,56 @@ void VarMapBuilder::VisitBinaryOperator(const BinaryOperator *BO) { } } +// Invalidates local variable definitions if variable escaped. +void VarMapBuilder::VisitCallExpr(const CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + // Heuristic for likely-benign functions that pass by mutable reference. This + // is needed to avoid a slew of false positives due to mutable reference + // passing where the captured reference is usually passed on by-value. + if (const IdentifierInfo *II = FD->getIdentifier()) { + // Any kind of std::bind-like functions. + if (II->isStr("bind") || II->isStr("bind_front")) + return; + } + + // Invalidate local variable definitions that are passed by non-const + // reference or non-const pointer. + for (unsigned Idx = 0; Idx < CE->getNumArgs(); ++Idx) { + if (Idx >= FD->getNumParams()) + break; + + const Expr *Arg = CE->getArg(Idx)->IgnoreParenImpCasts(); + const ParmVarDecl *PVD = FD->getParamDecl(Idx); + QualType ParamType = PVD->getType(); + + // Potential reassignment if passed by non-const reference / pointer. + const ValueDecl *VDec = nullptr; + if (ParamType->isReferenceType() && + !ParamType->getPointeeType().isConstQualified()) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) + VDec = DRE->getDecl(); + } else if (ParamType->isPointerType() && + !ParamType->getPointeeType().isConstQualified()) { + Arg = Arg->IgnoreParenCasts(); + if (const auto *UO = dyn_cast<UnaryOperator>(Arg)) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubE = UO->getSubExpr()->IgnoreParenCasts(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubE)) + VDec = DRE->getDecl(); + } + } + } + + if (VDec && Ctx.lookup(VDec)) { + Ctx = VMap->clearDefinition(VDec, Ctx); + VMap->saveContext(CE, Ctx); + } + } +} + // Computes the intersection of two contexts. The intersection is the // set of variables which have the same definition in both contexts; // variables with different definitions are discarded. @@ -674,11 +733,16 @@ LocalVariableMap::intersectContexts(Context C1, Context C2) { Context Result = C1; for (const auto &P : C1) { const NamedDecl *Dec = P.first; - const unsigned *i2 = C2.lookup(Dec); - if (!i2) // variable doesn't exist on second path + const unsigned *I2 = C2.lookup(Dec); + if (!I2) { + // The variable doesn't exist on second path. Result = removeDefinition(Dec, Result); - else if (*i2 != P.second) // variable exists, but has different definition + } else if (getCanonicalDefinitionID(P.second) != + getCanonicalDefinitionID(*I2)) { + // If canonical definitions mismatch the underlying definitions are + // different, invalidate. Result = clearDefinition(Dec, Result); + } } return Result; } @@ -698,13 +762,22 @@ LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { // createReferenceContext. void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { for (const auto &P : C1) { - unsigned i1 = P.second; - VarDefinition *VDef = &VarDefinitions[i1]; + const unsigned I1 = P.second; + VarDefinition *VDef = &VarDefinitions[I1]; assert(VDef->isReference()); - const unsigned *i2 = C2.lookup(P.first); - if (!i2 || (*i2 != i1)) - VDef->Ref = 0; // Mark this variable as undefined + const unsigned *I2 = C2.lookup(P.first); + if (!I2) { + // Variable does not exist at the end of the loop, invalidate. + VDef->Ref = 0; + continue; + } + + // Compare the canonical IDs. This correctly handles chains of references + // and determines if the variable is truly loop-invariant. + if (getCanonicalDefinitionID(VDef->Ref) != getCanonicalDefinitionID(*I2)) { + VDef->Ref = 0; // Mark this variable as undefined + } } } @@ -1196,11 +1269,10 @@ public: void warnIfMutexNotHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, Expr *MutexExp, - ProtectedOperationKind POK, til::LiteralPtr *Self, + ProtectedOperationKind POK, til::SExpr *Self, SourceLocation Loc); void warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, - Expr *MutexExp, til::LiteralPtr *Self, - SourceLocation Loc); + Expr *MutexExp, til::SExpr *Self, SourceLocation Loc); void checkAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, ProtectedOperationKind POK); @@ -1596,6 +1668,16 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, const CFGBlockInfo *PredBlockInfo = &BlockInfo[PredBlock->getBlockID()]; const LocalVarContext &LVarCtx = PredBlockInfo->ExitContext; + // Temporarily set the lookup context for SExprBuilder. + SxBuilder.setLookupLocalVarExpr([&](const NamedDecl *D) -> const Expr * { + if (!Handler.issueBetaWarnings()) + return nullptr; + auto Ctx = LVarCtx; + return LocalVarMap.lookupExpr(D, Ctx); + }); + auto Cleanup = llvm::make_scope_exit( + [this] { SxBuilder.setLookupLocalVarExpr(nullptr); }); + const auto *Exp = getTrylockCallExpr(Cond, LVarCtx, Negate); if (!Exp) return; @@ -1652,7 +1734,7 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { } void handleCall(const Expr *Exp, const NamedDecl *D, - til::LiteralPtr *Self = nullptr, + til::SExpr *Self = nullptr, SourceLocation Loc = SourceLocation()); void examineArguments(const FunctionDecl *FD, CallExpr::const_arg_iterator ArgBegin, @@ -1664,7 +1746,17 @@ public: const FactSet &FunctionExitFSet) : ConstStmtVisitor<BuildLockset>(), Analyzer(Anlzr), FSet(Info.EntrySet), FunctionExitFSet(FunctionExitFSet), LVarCtx(Info.EntryContext), - CtxIndex(Info.EntryIndex) {} + CtxIndex(Info.EntryIndex) { + Analyzer->SxBuilder.setLookupLocalVarExpr( + [this](const NamedDecl *D) -> const Expr * { + if (!Analyzer->Handler.issueBetaWarnings()) + return nullptr; + auto Ctx = LVarCtx; + return Analyzer->LocalVarMap.lookupExpr(D, Ctx); + }); + } + + ~BuildLockset() { Analyzer->SxBuilder.setLookupLocalVarExpr(nullptr); } void VisitUnaryOperator(const UnaryOperator *UO); void VisitBinaryOperator(const BinaryOperator *BO); @@ -1682,7 +1774,7 @@ public: /// of at least the passed in AccessKind. void ThreadSafetyAnalyzer::warnIfMutexNotHeld( const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, - Expr *MutexExp, ProtectedOperationKind POK, til::LiteralPtr *Self, + Expr *MutexExp, ProtectedOperationKind POK, til::SExpr *Self, SourceLocation Loc) { LockKind LK = getLockKindFromAccessKind(AK); CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); @@ -1741,8 +1833,7 @@ void ThreadSafetyAnalyzer::warnIfMutexNotHeld( /// Warn if the LSet contains the given lock. void ThreadSafetyAnalyzer::warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, - Expr *MutexExp, - til::LiteralPtr *Self, + Expr *MutexExp, til::SExpr *Self, SourceLocation Loc) { CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); if (Cp.isInvalid()) { @@ -1910,7 +2001,7 @@ void ThreadSafetyAnalyzer::checkPtAccess(const FactSet &FSet, const Expr *Exp, /// of an implicitly called cleanup function. /// \param Loc If \p Exp = nullptr, the location. void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, - til::LiteralPtr *Self, SourceLocation Loc) { + til::SExpr *Self, SourceLocation Loc) { CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; CapExprSet ScopedReqsAndExcludes; @@ -1922,7 +2013,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, const auto *TagT = Exp->getType()->getAs<TagType>(); if (D->hasAttrs() && TagT && Exp->isPRValue()) { til::LiteralPtr *Placeholder = - Analyzer->SxBuilder.createVariable(nullptr); + Analyzer->SxBuilder.createThisPlaceholder(); [[maybe_unused]] auto inserted = Analyzer->ConstructedObjects.insert({Exp, Placeholder}); assert(inserted.second && "Are we visiting the same expression again?"); @@ -2216,6 +2307,9 @@ void BuildLockset::examineArguments(const FunctionDecl *FD, } void BuildLockset::VisitCallExpr(const CallExpr *Exp) { + // adjust the context + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, Exp, LVarCtx); + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(Exp)) { const auto *ME = dyn_cast<MemberExpr>(CE->getCallee()); // ME can be null when calling a method pointer @@ -2603,7 +2697,8 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { } if (UnderlyingLocks.empty()) continue; - CapabilityExpr Cp(SxBuilder.createVariable(Param), StringRef(), + CapabilityExpr Cp(SxBuilder.translateVariable(Param, nullptr), + StringRef(), /*Neg=*/false, /*Reentrant=*/false); auto *ScopedEntry = FactMan.createFact<ScopedLockableFactEntry>( Cp, Param->getLocation(), FactEntry::Declared, @@ -2721,17 +2816,19 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (!DD->hasAttrs()) break; - LocksetBuilder.handleCall(nullptr, DD, - SxBuilder.createVariable(AD.getVarDecl()), - AD.getTriggerStmt()->getEndLoc()); + LocksetBuilder.handleCall( + nullptr, DD, + SxBuilder.translateVariable(AD.getVarDecl(), nullptr), + AD.getTriggerStmt()->getEndLoc()); break; } case CFGElement::CleanupFunction: { const CFGCleanupFunction &CF = BI.castAs<CFGCleanupFunction>(); - LocksetBuilder.handleCall(/*Exp=*/nullptr, CF.getFunctionDecl(), - SxBuilder.createVariable(CF.getVarDecl()), - CF.getVarDecl()->getLocation()); + LocksetBuilder.handleCall( + /*Exp=*/nullptr, CF.getFunctionDecl(), + SxBuilder.translateVariable(CF.getVarDecl(), nullptr), + CF.getVarDecl()->getLocation()); break; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index 68c27ee..25ad673 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -26,6 +26,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/Specifiers.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include <algorithm> @@ -239,7 +240,36 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, return CapabilityExpr(E, AttrExp->getType(), Neg); } -til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { +til::SExpr *SExprBuilder::translateVariable(const VarDecl *VD, + CallingContext *Ctx) { + assert(VD); + + // General recursion guard for x = f(x). If we are already in the process of + // defining VD, use its pre-assignment value to break the cycle. + if (VarsBeingTranslated.contains(VD->getCanonicalDecl())) + return new (Arena) til::LiteralPtr(VD); + VarsBeingTranslated.insert(VD->getCanonicalDecl()); + auto Cleanup = llvm::make_scope_exit( + [&] { VarsBeingTranslated.erase(VD->getCanonicalDecl()); }); + + QualType Ty = VD->getType(); + if (!VD->isStaticLocal() && Ty->isPointerType()) { + // Substitute local variable aliases with a canonical definition. + if (LookupLocalVarExpr) { + // Attempt to resolve an alias through the more complex local variable map + // lookup. This will fail with complex control-flow graphs (where we + // revert to no alias resolution to retain stable variable names). + if (const Expr *E = LookupLocalVarExpr(VD)) { + til::SExpr *Result = translate(E, Ctx); + // Unsupported expression (such as heap allocations) will be undefined; + // rather than failing here, we simply revert to the pointer being the + // canonical variable. + if (Result && !isa<til::Undefined>(Result)) + return Result; + } + } + } + return new (Arena) til::LiteralPtr(VD); } @@ -311,6 +341,8 @@ til::SExpr *SExprBuilder::translate(const Stmt *S, CallingContext *Ctx) { case Stmt::DeclStmtClass: return translateDeclStmt(cast<DeclStmt>(S), Ctx); + case Stmt::StmtExprClass: + return translateStmtExpr(cast<StmtExpr>(S), Ctx); default: break; } @@ -351,6 +383,9 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, : cast<ObjCMethodDecl>(D)->getCanonicalDecl()->getParamDecl(I); } + if (const auto *VarD = dyn_cast<VarDecl>(VD)) + return translateVariable(VarD, Ctx); + // For non-local variables, treat it as a reference to a named object. return new (Arena) til::LiteralPtr(VD); } @@ -689,6 +724,15 @@ SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) { return nullptr; } +til::SExpr *SExprBuilder::translateStmtExpr(const StmtExpr *SE, + CallingContext *Ctx) { + // The value of a statement expression is the value of the last statement, + // which must be an expression. + const CompoundStmt *CS = SE->getSubStmt(); + return CS->body_empty() ? new (Arena) til::Undefined(SE) + : translate(CS->body_back(), Ctx); +} + // If (E) is non-trivial, then add it to the current basic block, and // update the statement map so that S refers to E. Returns a new variable // that refers to E. diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index 81b186f..5878a4e 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -189,7 +189,12 @@ AttributeCommonInfo::Kind AttributeCommonInfo::getParsedKind(const IdentifierInfo *Name, const IdentifierInfo *ScopeName, Syntax SyntaxUsed) { - return ::getAttrKind(normalizeName(Name, ScopeName, SyntaxUsed), SyntaxUsed); + AttributeCommonInfo::Kind Kind = + ::getAttrKind(normalizeName(Name, ScopeName, SyntaxUsed), SyntaxUsed); + if (SyntaxUsed == AS_HLSLAnnotation && + Kind == AttributeCommonInfo::Kind::UnknownAttribute) + return AttributeCommonInfo::Kind::AT_HLSLUnparsedSemantic; + return Kind; } AttributeCommonInfo::AttrArgsInfo diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index e3f9760..1ae244e 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -683,6 +683,8 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple, return std::make_unique<SPIRV64AMDGCNTargetInfo>(Triple, Opts); return nullptr; } + if (Triple.getVendor() == llvm::Triple::Intel) + return std::make_unique<SPIRV64IntelTargetInfo>(Triple, Opts); return std::make_unique<SPIRV64TargetInfo>(Triple, Opts); } case llvm::Triple::wasm32: diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 58bfad1..d8b0e64 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -147,7 +147,7 @@ public: bool checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override { - if (ISAInfo->hasExtension("zicfiss")) + if (ISAInfo->hasExtension("zimop")) return true; return TargetInfo::checkCFProtectionReturnSupported(Diags); } diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 8bb0428..22b2799 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -469,6 +469,17 @@ public: bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); } }; +class LLVM_LIBRARY_VISIBILITY SPIRV64IntelTargetInfo final + : public SPIRV64TargetInfo { +public: + SPIRV64IntelTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : SPIRV64TargetInfo(Triple, Opts) { + assert(Triple.getVendor() == llvm::Triple::VendorType::Intel && + "64-bit Intel SPIR-V target must use Intel vendor"); + resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-" + "v256:256-v512:512-v1024:1024-n8:16:32:64-G1-P9-A0"); + } +}; } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_SPIR_H diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index b68e91f..8892e62 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -149,6 +149,57 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, emitVAEnd(emitVAListRef(e->getArg(0)).getPointer()); return {}; + case Builtin::BIalloca: + case Builtin::BI_alloca: + case Builtin::BI__builtin_alloca_uninitialized: + case Builtin::BI__builtin_alloca: { + // Get alloca size input + mlir::Value size = emitScalarExpr(e->getArg(0)); + + // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. + const TargetInfo &ti = getContext().getTargetInfo(); + const CharUnits suitableAlignmentInBytes = + getContext().toCharUnitsFromBits(ti.getSuitableAlign()); + + // Emit the alloca op with type `u8 *` to match the semantics of + // `llvm.alloca`. We later bitcast the type to `void *` to match the + // semantics of C/C++ + // FIXME(cir): It may make sense to allow AllocaOp of type `u8` to return a + // pointer of type `void *`. This will require a change to the allocaOp + // verifier. + mlir::Value allocaAddr = builder.createAlloca( + getLoc(e->getSourceRange()), builder.getUInt8PtrTy(), + builder.getUInt8Ty(), "bi_alloca", suitableAlignmentInBytes, size); + + // Initialize the allocated buffer if required. + if (builtinID != Builtin::BI__builtin_alloca_uninitialized) { + // Initialize the alloca with the given size and alignment according to + // the lang opts. Only the trivial non-initialization is supported for + // now. + + switch (getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + break; + case LangOptions::TrivialAutoVarInitKind::Zero: + case LangOptions::TrivialAutoVarInitKind::Pattern: + cgm.errorNYI("trivial auto var init"); + break; + } + } + + // An alloca will always return a pointer to the alloca (stack) address + // space. This address space need not be the same as the AST / Language + // default (e.g. in C / C++ auto vars are in the generic address space). At + // the AST level this is handled within CreateTempAlloca et al., but for the + // builtin / dynamic alloca we have to handle it here. + assert(!cir::MissingFeatures::addressSpace()); + + // Bitcast the alloca to the expected type. + return RValue::get( + builder.createBitcast(allocaAddr, builder.getVoidPtrTy())); + } + case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: @@ -360,6 +411,10 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_elementwise_acos: return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e); + case Builtin::BI__builtin_elementwise_asin: + return emitUnaryFPBuiltin<cir::ASinOp>(*this, *e); + case Builtin::BI__builtin_elementwise_atan: + return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e); } // If this is an alias for a lib function (e.g. __builtin_sin), emit diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 7cc024f..66cd673 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -31,6 +31,8 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d, cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: address space"); mlir::Location loc = getLoc(d.getSourceRange()); + bool nrvo = + getContext().getLangOpts().ElideConstructors && d.isNRVOVariable(); CIRGenFunction::AutoVarEmission emission(d); emission.IsEscapingByRef = d.isEscapingByref(); @@ -44,16 +46,37 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d, if (ty->isVariablyModifiedType()) cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: variably modified type"); + assert(!cir::MissingFeatures::openMP()); + Address address = Address::invalid(); if (!ty->isConstantSizeType()) cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: non-constant size type"); // A normal fixed sized variable becomes an alloca in the entry block, - mlir::Type allocaTy = convertTypeForMem(ty); - // Create the temp alloca and declare variable using it. - address = createTempAlloca(allocaTy, alignment, loc, d.getName(), - /*arraySize=*/nullptr, /*alloca=*/nullptr, ip); - declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()), alignment); + // unless: + // - it's an NRVO variable. + // - we are compiling OpenMP and it's an OpenMP local variable. + if (nrvo) { + // The named return value optimization: allocate this variable in the + // return slot, so that we can elide the copy when returning this + // variable (C++0x [class.copy]p34). + address = returnValue; + + if (const RecordDecl *rd = ty->getAsRecordDecl()) { + if (const auto *cxxrd = dyn_cast<CXXRecordDecl>(rd); + (cxxrd && !cxxrd->hasTrivialDestructor()) || + rd->isNonTrivialToPrimitiveDestroy()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: set NRVO flag"); + } + } else { + // A normal fixed sized variable becomes an alloca in the entry block, + mlir::Type allocaTy = convertTypeForMem(ty); + // Create the temp alloca and declare variable using it. + address = createTempAlloca(allocaTy, alignment, loc, d.getName(), + /*arraySize=*/nullptr, /*alloca=*/nullptr, ip); + declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()), + alignment); + } emission.Addr = address; setAddrOfLocalVar(&d, address); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index d8c7903..aab7e27 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1986,8 +1986,16 @@ void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, // Elide the constructor if we're constructing from a temporary if (getLangOpts().ElideConstructors && e->isElidable()) { - cgm.errorNYI(e->getSourceRange(), - "emitCXXConstructExpr: elidable constructor"); + // FIXME: This only handles the simplest case, where the source object is + // passed directly as the first argument to the constructor. This + // should also handle stepping through implicit casts and conversion + // sequences which involve two steps, with a conversion operator + // follwed by a converting constructor. + const Expr *srcObj = e->getArg(0); + assert(srcObj->isTemporaryObject(getContext(), cd->getParent())); + assert( + getContext().hasSameUnqualifiedType(e->getType(), srcObj->getType())); + emitAggExpr(srcObj, dest); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index fb782a09..e2181b8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -206,8 +206,10 @@ bool CIRGenFunction::constantFoldsToSimpleInteger(const Expr *cond, void CIRGenFunction::emitAndUpdateRetAlloca(QualType type, mlir::Location loc, CharUnits alignment) { if (!type->isVoidType()) { - fnRetAlloca = emitAlloca("__retval", convertType(type), loc, alignment, - /*insertIntoFnEntryBlock=*/false); + mlir::Value addr = emitAlloca("__retval", convertType(type), loc, alignment, + /*insertIntoFnEntryBlock=*/false); + fnRetAlloca = addr; + returnValue = Address(addr, alignment); } } @@ -655,6 +657,8 @@ void CIRGenFunction::emitDestructorBody(FunctionArgList &args) { // we'd introduce *two* handler blocks. In the Microsoft ABI, we // always delegate because we might not have a definition in this TU. switch (dtorType) { + case Dtor_Unified: + llvm_unreachable("not expecting a unified dtor"); case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index de9e354..42f7f40 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -66,6 +66,10 @@ public: /// The compiler-generated variable that holds the return value. std::optional<mlir::Value> fnRetAlloca; + /// The temporary alloca to hold the return value. This is + /// invalid iff the function has no return value. + Address returnValue = Address::invalid(); + /// Tracks function scope overall cleanup handling. EHScopeStack ehStack; @@ -726,6 +730,14 @@ public: const CXXRecordDecl *base, bool baseIsVirtual); + /// Determine whether a return value slot may overlap some other object. + AggValueSlot::Overlap_t getOverlapForReturnValue() { + // FIXME: Assuming no overlap here breaks guaranteed copy elision for base + // class subobjects. These cases may need to be revisited depending on the + // resolution of the relevant core issue. + return AggValueSlot::DoesNotOverlap; + } + /// Determine whether a base class initialization may overlap some other /// object. AggValueSlot::Overlap_t getOverlapForBaseInit(const CXXRecordDecl *rd, diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 12821c1..f116efc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -445,8 +445,8 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() && s.getNRVOCandidate()->isNRVOVariable()) { - getCIRGenModule().errorNYI(s.getSourceRange(), - "named return value optimization"); + assert(!cir::MissingFeatures::openMP()); + assert(!cir::MissingFeatures::nrvo()); } else if (!rv) { // No return expression. Do nothing. } else if (rv->getType()->isVoidType()) { @@ -471,9 +471,16 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { builder.CIRBaseBuilderTy::createStore(loc, value, *fnRetAlloca); } break; - default: + case cir::TEK_Complex: getCIRGenModule().errorNYI(s.getSourceRange(), - "non-scalar function return type"); + "complex function return type"); + break; + case cir::TEK_Aggregate: + assert(!cir::MissingFeatures::aggValueSlotGC()); + emitAggExpr(rv, AggValueSlot::forAddr(returnValue, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::IsNotAliased, + getOverlapForReturnValue())); break; } } diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index 4e0a041..72bbf08 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -42,7 +42,8 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (alloca->getBlock() == &entryBlock) return; // Don't hoist allocas with dynamic alloca size. - assert(!cir::MissingFeatures::opAllocaDynAllocSize()); + if (alloca.getDynAllocSize()) + return; // Hoist allocas into the entry block. diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index dc2bb5f..d9097b0 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -598,6 +598,14 @@ mlir::LogicalResult CIRToLLVMACosOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite( + cir::ASinOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resTy = typeConverter->convertType(op.getType()); + rewriter.replaceOpWithNewOp<mlir::LLVM::ASinOp>(op, resTy, adaptor.getSrc()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite( cir::AssumeOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -1095,12 +1103,23 @@ mlir::LogicalResult CIRToLLVMBaseClassAddrOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMATanOpLowering::matchAndRewrite( + cir::ATanOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resTy = typeConverter->convertType(op.getType()); + rewriter.replaceOpWithNewOp<mlir::LLVM::ATanOp>(op, resTy, adaptor.getSrc()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite( cir::AllocaOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { - assert(!cir::MissingFeatures::opAllocaDynAllocSize()); - mlir::Value size = rewriter.create<mlir::LLVM::ConstantOp>( - op.getLoc(), typeConverter->convertType(rewriter.getIndexType()), 1); + mlir::Value size = + op.isDynamic() + ? adaptor.getDynAllocSize() + : rewriter.create<mlir::LLVM::ConstantOp>( + op.getLoc(), + typeConverter->convertType(rewriter.getIndexType()), 1); mlir::Type elementTy = convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType()); mlir::Type resultTy = @@ -2451,11 +2470,13 @@ void ConvertCIRToLLVMPass::runOnOperation() { patterns.add< // clang-format off CIRToLLVMACosOpLowering, + CIRToLLVMASinOpLowering, CIRToLLVMAssumeOpLowering, CIRToLLVMAssumeAlignedOpLowering, CIRToLLVMAssumeSepStorageOpLowering, CIRToLLVMAtomicCmpXchgLowering, CIRToLLVMBaseClassAddrOpLowering, + CIRToLLVMATanOpLowering, CIRToLLVMBinOpLowering, CIRToLLVMBitClrsbOpLowering, CIRToLLVMBitClzOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index cf98baf..dd1dd0a 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -746,6 +746,24 @@ public: mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMASinOpLowering : public mlir::OpConversionPattern<cir::ASinOp> { +public: + using mlir::OpConversionPattern<cir::ASinOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ASinOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMATanOpLowering : public mlir::OpConversionPattern<cir::ATanOp> { +public: + using mlir::OpConversionPattern<cir::ATanOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ATanOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMInlineAsmOpLowering : public mlir::OpConversionPattern<cir::InlineAsmOp> { mlir::DataLayout const &dataLayout; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a94a7ed..0b2fce4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -752,9 +752,8 @@ const CGFunctionInfo &CodeGenTypes::arrangeBuiltinFunctionDeclaration( RequiredArgs::All); } -const CGFunctionInfo & -CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType, - const FunctionArgList &args) { +const CGFunctionInfo &CodeGenTypes::arrangeDeviceKernelCallerDeclaration( + QualType resultType, const FunctionArgList &args) { CanQualTypeList argTypes = getArgTypesForDeclaration(Context, args); return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index bae55aa..8346ee3 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1271,10 +1271,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, const CXXRecordDecl *ClassDecl = CD->getParent(); - CXXConstructorDecl::init_const_iterator B = CD->init_begin(), - E = CD->init_end(); - - // Virtual base initializers first, if any. They aren't needed if: + // Virtual base initializers aren't needed if: // - This is a base ctor variant // - There are no vbases // - The class is abstract, so a complete object of it cannot be constructed @@ -1296,15 +1293,36 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, assert(BaseCtorContinueBB); } - for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) { - if (!ConstructVBases) - continue; - SaveAndRestore ThisRAII(CXXThisValue); - if (CGM.getCodeGenOpts().StrictVTablePointers && - CGM.getCodeGenOpts().OptimizationLevel > 0 && - isInitializerOfDynamicClass(*B)) - CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B); + // Create three separate ranges for the different types of initializers. + auto AllInits = CD->inits(); + + // Find the boundaries between the three groups. + auto VirtualBaseEnd = std::find_if( + AllInits.begin(), AllInits.end(), [](const CXXCtorInitializer *Init) { + return !(Init->isBaseInitializer() && Init->isBaseVirtual()); + }); + + auto NonVirtualBaseEnd = std::find_if(VirtualBaseEnd, AllInits.end(), + [](const CXXCtorInitializer *Init) { + return !Init->isBaseInitializer(); + }); + + // Create the three ranges. + auto VirtualBaseInits = llvm::make_range(AllInits.begin(), VirtualBaseEnd); + auto NonVirtualBaseInits = + llvm::make_range(VirtualBaseEnd, NonVirtualBaseEnd); + auto MemberInits = llvm::make_range(NonVirtualBaseEnd, AllInits.end()); + + // Process virtual base initializers, if necessary. + if (ConstructVBases) { + for (CXXCtorInitializer *Initializer : VirtualBaseInits) { + SaveAndRestore ThisRAII(CXXThisValue); + if (CGM.getCodeGenOpts().StrictVTablePointers && + CGM.getCodeGenOpts().OptimizationLevel > 0 && + isInitializerOfDynamicClass(Initializer)) + CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); + EmitBaseInitializer(*this, ClassDecl, Initializer); + } } if (BaseCtorContinueBB) { @@ -1314,14 +1332,14 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, } // Then, non-virtual base initializers. - for (; B != E && (*B)->isBaseInitializer(); B++) { - assert(!(*B)->isBaseVirtual()); + for (CXXCtorInitializer *Initializer : NonVirtualBaseInits) { + assert(!Initializer->isBaseVirtual()); SaveAndRestore ThisRAII(CXXThisValue); if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && - isInitializerOfDynamicClass(*B)) + isInitializerOfDynamicClass(Initializer)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B); + EmitBaseInitializer(*this, ClassDecl, Initializer); } InitializeVTablePointers(ClassDecl); @@ -1329,8 +1347,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, // And finally, initialize class members. FieldConstructionScope FCS(*this, LoadCXXThisAddress()); ConstructorMemcpyizer CM(*this, CD, Args); - for (; B != E; B++) { - CXXCtorInitializer *Member = (*B); + for (CXXCtorInitializer *Member : MemberInits) { assert(!Member->isBaseInitializer()); assert(Member->isAnyMemberInitializer() && "Delegating initializer on non-delegating constructor"); @@ -1481,6 +1498,8 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // we'd introduce *two* handler blocks. In the Microsoft ABI, we // always delegate because we might not have a definition in this TU. switch (DtorType) { + case Dtor_Unified: + llvm_unreachable("not expecting a unified dtor"); case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: llvm_unreachable("already handled deleting case"); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 0385dbda..578d09f 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2177,24 +2177,47 @@ static bool isFunctionLocalClass(const CXXRecordDecl *RD) { return false; } +llvm::StringRef +CGDebugInfo::GetMethodLinkageName(const CXXMethodDecl *Method) const { + assert(Method); + + const bool IsCtorOrDtor = + isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method); + + if (IsCtorOrDtor && !CGM.getCodeGenOpts().DebugStructorDeclLinkageNames) + return {}; + + // In some ABIs (particularly Itanium) a single ctor/dtor + // corresponds to multiple functions. Attach a "unified" + // linkage name for those (which is the convention GCC uses). + // Otherwise, attach no linkage name. + if (IsCtorOrDtor && !CGM.getTarget().getCXXABI().hasConstructorVariants()) + return {}; + + if (const auto *Ctor = llvm::dyn_cast<CXXConstructorDecl>(Method)) + return CGM.getMangledName(GlobalDecl(Ctor, CXXCtorType::Ctor_Unified)); + + if (const auto *Dtor = llvm::dyn_cast<CXXDestructorDecl>(Method)) + return CGM.getMangledName(GlobalDecl(Dtor, CXXDtorType::Dtor_Unified)); + + return CGM.getMangledName(Method); +} + llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( const CXXMethodDecl *Method, llvm::DIFile *Unit, llvm::DIType *RecordTy) { - bool IsCtorOrDtor = - isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method); + assert(Method); StringRef MethodName = getFunctionName(Method); llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit); - // Since a single ctor/dtor corresponds to multiple functions, it doesn't - // make sense to give a single ctor/dtor a linkage name. StringRef MethodLinkageName; // FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional // property to use here. It may've been intended to model "is non-external // type" but misses cases of non-function-local but non-external classes such // as those in anonymous namespaces as well as the reverse - external types // that are function local, such as those in (non-local) inline functions. - if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent())) - MethodLinkageName = CGM.getMangledName(Method); + if (!isFunctionLocalClass(Method->getParent())) + MethodLinkageName = GetMethodLinkageName(Method); // Get the location for the method. llvm::DIFile *MethodDefUnit = nullptr; diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index ff9c3cd..f860773 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -899,6 +899,10 @@ private: std::memcpy(Data + A.size(), B.data(), B.size()); return StringRef(Data, A.size() + B.size()); } + + /// If one exists, returns the linkage name of the specified \ + /// (non-null) \c Method. Returns empty string otherwise. + llvm::StringRef GetMethodLinkageName(const CXXMethodDecl *Method) const; }; /// A scoped helper to set the current debug location to the specified diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 2eff3a3..ce483c5 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -5216,6 +5216,11 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); } } + // OpenMP: Handle lastprivate(condition:) in scalar assignment + if (CGF.getLangOpts().OpenMP) { + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, + E->getLHS()); + } // If the result is clearly ignored, return now. if (Ignore) diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index d27f378..afee119 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -23,6 +23,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Basic/TargetOptions.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/HLSL/RootSignatureMetadata.h" @@ -69,9 +70,9 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { DXILValMD->addOperand(Val); } -void addRootSignature(llvm::dxbc::RootSignatureVersion RootSigVer, - ArrayRef<llvm::hlsl::rootsig::RootElement> Elements, - llvm::Function *Fn, llvm::Module &M) { +void addRootSignatureMD(llvm::dxbc::RootSignatureVersion RootSigVer, + ArrayRef<llvm::hlsl::rootsig::RootElement> Elements, + llvm::Function *Fn, llvm::Module &M) { auto &Ctx = M.getContext(); llvm::hlsl::rootsig::MetadataBuilder RSBuilder(Ctx, Elements); @@ -79,8 +80,8 @@ void addRootSignature(llvm::dxbc::RootSignatureVersion RootSigVer, ConstantAsMetadata *Version = ConstantAsMetadata::get(ConstantInt::get( llvm::Type::getInt32Ty(Ctx), llvm::to_underlying(RootSigVer))); - MDNode *MDVals = - MDNode::get(Ctx, {ValueAsMetadata::get(Fn), RootSignature, Version}); + ValueAsMetadata *EntryFunc = Fn ? ValueAsMetadata::get(Fn) : nullptr; + MDNode *MDVals = MDNode::get(Ctx, {EntryFunc, RootSignature, Version}); StringRef RootSignatureValKey = "dx.rootsignatures"; auto *RootSignatureValMD = M.getOrInsertNamedMetadata(RootSignatureValKey); @@ -448,6 +449,19 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { } } +void CGHLSLRuntime::addRootSignature( + const HLSLRootSignatureDecl *SignatureDecl) { + llvm::Module &M = CGM.getModule(); + Triple T(M.getTargetTriple()); + + // Generated later with the function decl if not targeting root signature + if (T.getEnvironment() != Triple::EnvironmentType::RootSignature) + return; + + addRootSignatureMD(SignatureDecl->getVersion(), + SignatureDecl->getRootElements(), nullptr, M); +} + llvm::TargetExtType * CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { const auto Entry = LayoutTypes.find(StructType); @@ -552,47 +566,78 @@ static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M, return B.CreateLoad(Ty, GV); } -llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, - const ParmVarDecl &D, - llvm::Type *Ty) { - assert(D.hasAttrs() && "Entry parameter missing annotation attribute!"); - if (D.hasAttr<HLSLSV_GroupIndexAttr>()) { +llvm::Value * +CGHLSLRuntime::emitSystemSemanticLoad(IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic) { + if (isa<HLSLSV_GroupIndexAttr>(ActiveSemantic.Semantic)) { llvm::Function *GroupIndex = CGM.getIntrinsic(getFlattenedThreadIdInGroupIntrinsic()); return B.CreateCall(FunctionCallee(GroupIndex)); } - if (D.hasAttr<HLSLSV_DispatchThreadIDAttr>()) { + + if (isa<HLSLSV_DispatchThreadIDAttr>(ActiveSemantic.Semantic)) { llvm::Intrinsic::ID IntrinID = getThreadIdIntrinsic(); llvm::Function *ThreadIDIntrinsic = llvm::Intrinsic::isOverloaded(IntrinID) ? CGM.getIntrinsic(IntrinID, {CGM.Int32Ty}) : CGM.getIntrinsic(IntrinID); - return buildVectorInput(B, ThreadIDIntrinsic, Ty); + return buildVectorInput(B, ThreadIDIntrinsic, Type); } - if (D.hasAttr<HLSLSV_GroupThreadIDAttr>()) { + + if (isa<HLSLSV_GroupThreadIDAttr>(ActiveSemantic.Semantic)) { llvm::Intrinsic::ID IntrinID = getGroupThreadIdIntrinsic(); llvm::Function *GroupThreadIDIntrinsic = llvm::Intrinsic::isOverloaded(IntrinID) ? CGM.getIntrinsic(IntrinID, {CGM.Int32Ty}) : CGM.getIntrinsic(IntrinID); - return buildVectorInput(B, GroupThreadIDIntrinsic, Ty); + return buildVectorInput(B, GroupThreadIDIntrinsic, Type); } - if (D.hasAttr<HLSLSV_GroupIDAttr>()) { + + if (isa<HLSLSV_GroupIDAttr>(ActiveSemantic.Semantic)) { llvm::Intrinsic::ID IntrinID = getGroupIdIntrinsic(); llvm::Function *GroupIDIntrinsic = llvm::Intrinsic::isOverloaded(IntrinID) ? CGM.getIntrinsic(IntrinID, {CGM.Int32Ty}) : CGM.getIntrinsic(IntrinID); - return buildVectorInput(B, GroupIDIntrinsic, Ty); + return buildVectorInput(B, GroupIDIntrinsic, Type); } - if (D.hasAttr<HLSLSV_PositionAttr>()) { - if (getArch() == llvm::Triple::spirv) - return createSPIRVBuiltinLoad(B, CGM.getModule(), Ty, "sv_position", - /* BuiltIn::Position */ 0); - llvm_unreachable("SV_Position semantic not implemented for this target."); + + if (HLSLSV_PositionAttr *S = + dyn_cast<HLSLSV_PositionAttr>(ActiveSemantic.Semantic)) { + if (CGM.getTriple().getEnvironment() == Triple::EnvironmentType::Pixel) + return createSPIRVBuiltinLoad(B, CGM.getModule(), Type, + S->getAttrName()->getName(), + /* BuiltIn::FragCoord */ 15); } - assert(false && "Unhandled parameter attribute"); - return nullptr; + + llvm_unreachable("non-handled system semantic. FIXME."); +} + +llvm::Value * +CGHLSLRuntime::handleScalarSemanticLoad(IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic) { + + if (!ActiveSemantic.Semantic) { + ActiveSemantic.Semantic = Decl->getAttr<HLSLSemanticAttr>(); + if (!ActiveSemantic.Semantic) { + CGM.getDiags().Report(Decl->getInnerLocStart(), + diag::err_hlsl_semantic_missing); + return nullptr; + } + ActiveSemantic.Index = ActiveSemantic.Semantic->getSemanticIndex(); + } + + return emitSystemSemanticLoad(B, Type, Decl, ActiveSemantic); +} + +llvm::Value * +CGHLSLRuntime::handleSemanticLoad(IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic) { + assert(!Type->isStructTy()); + return handleScalarSemanticLoad(B, Type, Decl, ActiveSemantic); } void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, @@ -637,8 +682,10 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, Args.emplace_back(PoisonValue::get(Param.getType())); continue; } + const ParmVarDecl *PD = FD->getParamDecl(Param.getArgNo() - SRetOffset); - Args.push_back(emitInputSemantic(B, *PD, Param.getType())); + SemanticInfo ActiveSemantic = {nullptr, 0}; + Args.push_back(handleSemanticLoad(B, Param.getType(), PD, ActiveSemantic)); } CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args, OB); @@ -651,8 +698,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, for (const Attr *Attr : FD->getAttrs()) { if (const auto *RSAttr = dyn_cast<RootSignatureAttr>(Attr)) { auto *RSDecl = RSAttr->getSignatureDecl(); - addRootSignature(RSDecl->getVersion(), RSDecl->getRootElements(), EntryFn, - M); + addRootSignatureMD(RSDecl->getVersion(), RSDecl->getRootElements(), + EntryFn, M); } } } @@ -829,11 +876,27 @@ llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) { class OpaqueValueVisitor : public RecursiveASTVisitor<OpaqueValueVisitor> { public: - llvm::SmallPtrSet<OpaqueValueExpr *, 8> OVEs; + llvm::SmallVector<OpaqueValueExpr *, 8> OVEs; + llvm::SmallPtrSet<OpaqueValueExpr *, 8> Visited; OpaqueValueVisitor() {} + bool VisitHLSLOutArgExpr(HLSLOutArgExpr *) { + // These need to be bound in CodeGenFunction::EmitHLSLOutArgLValues + // or CodeGenFunction::EmitHLSLOutArgExpr. If they are part of this + // traversal, the temporary containing the copy out will not have + // been created yet. + return false; + } + bool VisitOpaqueValueExpr(OpaqueValueExpr *E) { - OVEs.insert(E); + // Traverse the source expression first. + if (E->getSourceExpr()) + TraverseStmt(E->getSourceExpr()); + + // Then add this OVE if we haven't seen it before. + if (Visited.insert(E).second) + OVEs.push_back(E); + return true; } }; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 0582be3..370f3d5 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -21,6 +21,8 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsSPIRV.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/HLSLRuntime.h" @@ -62,6 +64,7 @@ class VarDecl; class ParmVarDecl; class InitListExpr; class HLSLBufferDecl; +class HLSLRootSignatureDecl; class HLSLVkBindingAttr; class HLSLResourceBindingAttr; class Type; @@ -137,8 +140,26 @@ public: protected: CodeGenModule &CGM; - llvm::Value *emitInputSemantic(llvm::IRBuilder<> &B, const ParmVarDecl &D, - llvm::Type *Ty); + void collectInputSemantic(llvm::IRBuilder<> &B, const DeclaratorDecl *D, + llvm::Type *Type, + SmallVectorImpl<llvm::Value *> &Inputs); + + struct SemanticInfo { + clang::HLSLSemanticAttr *Semantic; + uint32_t Index; + }; + + llvm::Value *emitSystemSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic); + + llvm::Value *handleScalarSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic); + + llvm::Value *handleSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + SemanticInfo &ActiveSemantic); public: CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {} @@ -151,6 +172,7 @@ public: void generateGlobalCtorDtorCalls(); void addBuffer(const HLSLBufferDecl *D); + void addRootSignature(const HLSLRootSignatureDecl *D); void finishCodeGen(); void setHLSLEntryAttributes(const FunctionDecl *FD, llvm::Function *Fn); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b38eb54..e80aa15 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1038,7 +1038,8 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.initialize(); - OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice + OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(), + CGM.getLangOpts().OpenMPIsTargetDevice ? CGM.getLangOpts().OMPHostIRFile : StringRef{}); OMPBuilder.setConfig(Config); @@ -1238,7 +1239,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); + return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D); } std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { @@ -6227,7 +6228,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + return CGF.GenerateOpenMPCapturedStmtFunction(CS, D); }; cantFail(OMPBuilder.emitTargetRegionFunction( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index a80d9fd..8a402fc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1210,9 +1210,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall( if (!CGF.HaveInsertPoint()) return; - auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads, - NumThreadsModifier, Severity, Message]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, + NumThreads](CodeGenFunction &CGF, + PrePostActionTy &Action) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; @@ -1260,22 +1260,21 @@ void CGOpenMPRuntimeGPU::emitParallelCall( NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty); assert(IfCondVal && "Expected a value"); - RuntimeFunction FnID = OMPRTL___kmpc_parallel_51; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::SmallVector<llvm::Value *, 10> Args( - {RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal, - llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID, - Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF), - CGF.VoidPtrPtrTy), - llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}); - if (NumThreadsModifier == OMPC_NUMTHREADS_strict) { - FnID = OMPRTL___kmpc_parallel_60; - Args.append({llvm::ConstantInt::get(CGM.Int32Ty, true), - emitSeverityClause(Severity), - emitMessageClause(CGF, Message)}); - } - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args); + llvm::Value *Args[] = { + RTLoc, + getThreadID(CGF, Loc), + IfCondVal, + NumThreadsVal, + llvm::ConstantInt::get(CGF.Int32Ty, -1), + FnPtr, + ID, + Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF), + CGF.VoidPtrPtrTy), + llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_parallel_51), + Args); }; RegionCodeGenTy RCG(ParallelGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 3e36708..665221b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -165,11 +165,6 @@ public: /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. - /// If the modifier 'strict' is given: - /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_threads, int severity, const char *message) to - /// generate code for 'num_threads' clause with 'strict' modifier. - /// \param NumThreads An integer value of threads. void emitNumThreadsClause( CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown, @@ -238,11 +233,11 @@ public: /// \param NumThreads The value corresponding to the num_threads clause, if /// any, or nullptr. /// \param NumThreadsModifier The modifier of the num_threads clause, if - /// any, ignored otherwise. + /// any, ignored otherwise. Currently unused on the device. /// \param Severity The severity corresponding to the num_threads clause, if - /// any, ignored otherwise. + /// any, ignored otherwise. Currently unused on the device. /// \param Message The message string corresponding to the num_threads clause, - /// if any, or nullptr. + /// if any, or nullptr. Currently unused on the device. void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 1360680..d72cd8f 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -471,12 +471,13 @@ struct FunctionOptions { const StringRef FunctionName; /// Location of the non-debug version of the outlined function. SourceLocation Loc; + const bool IsDeviceKernel = false; explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, bool RegisterCastedArgsOnly, StringRef FunctionName, - SourceLocation Loc) + SourceLocation Loc, bool IsDeviceKernel) : S(S), UIntPtrCastRequired(UIntPtrCastRequired), RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), - FunctionName(FunctionName), Loc(Loc) {} + FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {} }; } // namespace @@ -570,7 +571,11 @@ static llvm::Function *emitOutlinedFunctionPrologue( // Create the function declaration. const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); + FO.IsDeviceKernel + ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(Ctx.VoidTy, + TargetArgs) + : CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, + TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); auto *F = @@ -664,9 +669,9 @@ static llvm::Function *emitOutlinedFunctionPrologue( return F; } -llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, - SourceLocation Loc) { +llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction( + const CapturedStmt &S, const OMPExecutableDirective &D) { + SourceLocation Loc = D.getBeginLoc(); assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -682,7 +687,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << CapturedStmtInfo->getHelperName(); - + OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); + bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() && + isOpenMPTargetExecutionDirective(EKind) && + D.getCapturedStmt(OMPD_target) == &S; CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); llvm::Function *WrapperF = nullptr; if (NeedWrapperFunction) { @@ -690,7 +698,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, // OpenMPI-IR-Builder. FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, - CapturedStmtInfo->getHelperName(), Loc); + CapturedStmtInfo->getHelperName(), Loc, + IsDeviceKernel); WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; WrapperF = emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, @@ -698,7 +707,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, Out << "_debug__"; } FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - Out.str(), Loc); + Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel); llvm::Function *F = emitOutlinedFunctionPrologue( *this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO); CodeGenFunction::OMPPrivateScope LocalScope(*this); @@ -6119,13 +6128,13 @@ void CodeGenFunction::EmitOMPDistributeDirective( emitOMPDistributeDirective(S, *this, CGM); } -static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, - const CapturedStmt *S, - SourceLocation Loc) { +static llvm::Function * +emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S, + const OMPExecutableDirective &D) { CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; - llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); + llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, D); Fn->setDoesNotRecurse(); return Fn; } @@ -6190,8 +6199,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { Builder, /*CreateBranch=*/false, ".ordered.after"); llvm::SmallVector<llvm::Value *, 16> CapturedVars; GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = - emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); + llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S); assert(S.getBeginLoc().isValid() && "Outlined function call location must be valid."); ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); @@ -6233,8 +6241,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (C) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = - emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); + llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 123cb4f..727487b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3694,8 +3694,9 @@ public: llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); - llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, - SourceLocation Loc); + llvm::Function * + GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + const OMPExecutableDirective &D); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 87d2cd4..a16dfb5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -582,8 +582,7 @@ void CodeGenModule::createOpenCLRuntime() { } void CodeGenModule::createOpenMPRuntime() { - if (!LangOpts.OMPHostIRFile.empty() && - !llvm::sys::fs::exists(LangOpts.OMPHostIRFile)) + if (!LangOpts.OMPHostIRFile.empty() && !FS->exists(LangOpts.OMPHostIRFile)) Diags.Report(diag::err_omp_host_ir_file_not_found) << LangOpts.OMPHostIRFile; @@ -7545,7 +7544,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { break; case Decl::HLSLRootSignature: - // Will be handled by attached function + getHLSLRuntime().addRootSignature(cast<HLSLRootSignatureDecl>(D)); break; case Decl::HLSLBuffer: getHLSLRuntime().addBuffer(cast<HLSLBufferDecl>(D)); diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp index b9a96fe..7d66d96 100644 --- a/clang/lib/CodeGen/CodeGenSYCL.cpp +++ b/clang/lib/CodeGen/CodeGenSYCL.cpp @@ -49,7 +49,7 @@ void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn, // Compute the function info and LLVM function type. const CGFunctionInfo &FnInfo = - getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args); + getTypes().arrangeDeviceKernelCallerDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); // Retrieve the generated name for the SYCL kernel caller function. diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 29f6f1e..9de7e0a 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -229,12 +229,12 @@ public: const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args); - /// A SYCL kernel caller function is an offload device entry point function + /// A device kernel caller function is an offload device entry point function /// with a target device dependent calling convention such as amdgpu_kernel, /// ptx_kernel, or spir_kernel. const CGFunctionInfo & - arrangeSYCLKernelCallerDeclaration(QualType resultType, - const FunctionArgList &args); + arrangeDeviceKernelCallerDeclaration(QualType resultType, + const FunctionArgList &args); /// Objective-C methods are C functions with some implicit parameters. const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index d5b5fd7..7dc2eaf 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -91,6 +91,8 @@ public: case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?"); + case Dtor_Unified: + llvm_unreachable("emitting unified dtor as function?"); } llvm_unreachable("bad dtor kind"); } @@ -108,6 +110,9 @@ public: case Ctor_Comdat: llvm_unreachable("emitting ctor comdat as function?"); + + case Ctor_Unified: + llvm_unreachable("emitting unified ctor as function?"); } llvm_unreachable("bad dtor kind"); } diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 88f0648..94190a1 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -77,6 +77,8 @@ public: return false; case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?"); + case Dtor_Unified: + llvm_unreachable("unexpected unified dtor type"); } llvm_unreachable("bad dtor kind"); } @@ -1417,6 +1419,8 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( // and are emitted everywhere they are used. They are internal if the class // is internal. return llvm::GlobalValue::LinkOnceODRLinkage; + case Dtor_Unified: + llvm_unreachable("MS C++ ABI does not support unified dtors"); case Dtor_Comdat: llvm_unreachable("MS C++ ABI does not support comdat dtors"); } diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 87a4628..07cf08c 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); } +static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) { + switch (BuiltinID) { + default: + llvm_unreachable("Unknown BuiltinID for wave reduction"); + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64: + return Intrinsic::amdgcn_wave_reduce_add; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64: + return Intrinsic::amdgcn_wave_reduce_sub; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + return Intrinsic::amdgcn_wave_reduce_min; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + return Intrinsic::amdgcn_wave_reduce_umin; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + return Intrinsic::amdgcn_wave_reduce_max; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + return Intrinsic::amdgcn_wave_reduce_umax; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + return Intrinsic::amdgcn_wave_reduce_and; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + return Intrinsic::amdgcn_wave_reduce_or; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: + return Intrinsic::amdgcn_wave_reduce_xor; + } +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: { + Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID); + llvm::Value *Value = EmitScalarExpr(E->getArg(0)); + llvm::Value *Strategy = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); + return Builder.CreateCall(F, {Value, Strategy}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 5380624..2e3fc53 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -132,13 +132,14 @@ ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const { } ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { - if (getContext().getLangOpts().CUDAIsDevice) { + if (getContext().getLangOpts().isTargetDevice()) { // Coerce pointer arguments with default address space to CrossWorkGroup - // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the - // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space. + // pointers for target devices as default address space kernel arguments + // are not allowed. We use the opencl_global language address space which + // always maps to CrossWorkGroup. llvm::Type *LTy = CGT.ConvertType(Ty); auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default); - auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device); + auto GlobalAS = getContext().getTargetAddressSpace(LangAS::opencl_global); auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy); if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) { LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS); diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 7ee8a0d..c03ba94 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1513,12 +1513,18 @@ static void initFeatureMaps(const ASTContext &Ctx, static bool checkAVXParamFeature(DiagnosticsEngine &Diag, SourceLocation CallLoc, + const FunctionDecl &Callee, const llvm::StringMap<bool> &CallerMap, const llvm::StringMap<bool> &CalleeMap, QualType Ty, StringRef Feature, bool IsArgument) { bool CallerHasFeat = CallerMap.lookup(Feature); bool CalleeHasFeat = CalleeMap.lookup(Feature); + // No explicit features and the function is internal, be permissive. + if (!CallerHasFeat && !CalleeHasFeat && + (!Callee.isExternallyVisible() || Callee.hasAttr<AlwaysInlineAttr>())) + return false; + if (!CallerHasFeat && !CalleeHasFeat) return Diag.Report(CallLoc, diag::warn_avx_calling_convention) << IsArgument << Ty << Feature; @@ -1534,18 +1540,18 @@ static bool checkAVXParamFeature(DiagnosticsEngine &Diag, } static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, - SourceLocation CallLoc, + SourceLocation CallLoc, const FunctionDecl &Callee, const llvm::StringMap<bool> &CallerMap, const llvm::StringMap<bool> &CalleeMap, QualType Ty, bool IsArgument) { uint64_t Size = Ctx.getTypeSize(Ty); if (Size > 256) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + return checkAVXParamFeature(Diag, CallLoc, Callee, CallerMap, CalleeMap, Ty, "avx512f", IsArgument); if (Size > 128) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", - IsArgument); + return checkAVXParamFeature(Diag, CallLoc, Callee, CallerMap, CalleeMap, Ty, + "avx", IsArgument); return false; } @@ -1582,8 +1588,8 @@ void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, if (ArgIndex < Callee->getNumParams()) Ty = Callee->getParamDecl(ArgIndex)->getType(); - if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, - CalleeMap, Ty, /*IsArgument*/ true)) + if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, *Callee, + CallerMap, CalleeMap, Ty, /*IsArgument*/ true)) return; } ++ArgIndex; @@ -1594,7 +1600,7 @@ void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, if (Callee->getReturnType()->isVectorType() && CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); - checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, + checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, *Callee, CallerMap, CalleeMap, Callee->getReturnType(), /*IsArgument*/ false); } diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 49c89ab..a9041d2 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -519,7 +519,9 @@ ToolChain::getTargetAndModeFromProgramName(StringRef PN) { StringRef Prefix(ProgName); Prefix = Prefix.slice(0, LastComponent); std::string IgnoredError; - bool IsRegistered = llvm::TargetRegistry::lookupTarget(Prefix, IgnoredError); + + llvm::Triple Triple(Prefix); + bool IsRegistered = llvm::TargetRegistry::lookupTarget(Triple, IgnoredError); return ParsedClangName{std::string(Prefix), ModeSuffix, DS->ModeFlag, IsRegistered}; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 21e45c6..946b1e3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2730,17 +2730,40 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, } } -static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) { - return (Range == LangOptions::ComplexRangeKind::CX_None) - ? "" - : "-fcomplex-arithmetic=" + complexRangeKindToStr(Range); -} +static void EmitComplexRangeDiag(const Driver &D, StringRef LastOpt, + LangOptions::ComplexRangeKind Range, + StringRef NewOpt, + LangOptions::ComplexRangeKind NewRange) { + // Do not emit a warning if NewOpt overrides LastOpt in the following cases. + // + // | LastOpt | NewOpt | + // |-----------------------|-----------------------| + // | -fcx-limited-range | -fno-cx-limited-range | + // | -fno-cx-limited-range | -fcx-limited-range | + // | -fcx-fortran-rules | -fno-cx-fortran-rules | + // | -fno-cx-fortran-rules | -fcx-fortran-rules | + // | -ffast-math | -fno-fast-math | + // | -ffp-model= | -ffast-math | + // | -ffp-model= | -fno-fast-math | + // | -ffp-model= | -ffp-model= | + // | -fcomplex-arithmetic= | -fcomplex-arithmetic= | + if (LastOpt == NewOpt || NewOpt.empty() || LastOpt.empty() || + (LastOpt == "-fcx-limited-range" && NewOpt == "-fno-cx-limited-range") || + (LastOpt == "-fno-cx-limited-range" && NewOpt == "-fcx-limited-range") || + (LastOpt == "-fcx-fortran-rules" && NewOpt == "-fno-cx-fortran-rules") || + (LastOpt == "-fno-cx-fortran-rules" && NewOpt == "-fcx-fortran-rules") || + (LastOpt == "-ffast-math" && NewOpt == "-fno-fast-math") || + (LastOpt.starts_with("-ffp-model=") && NewOpt == "-ffast-math") || + (LastOpt.starts_with("-ffp-model=") && NewOpt == "-fno-fast-math") || + (LastOpt.starts_with("-ffp-model=") && + NewOpt.starts_with("-ffp-model=")) || + (LastOpt.starts_with("-fcomplex-arithmetic=") && + NewOpt.starts_with("-fcomplex-arithmetic="))) + return; -static void EmitComplexRangeDiag(const Driver &D, std::string str1, - std::string str2) { - if (str1 != str2 && !str2.empty() && !str1.empty()) { - D.Diag(clang::diag::warn_drv_overriding_option) << str1 << str2; - } + D.Diag(clang::diag::warn_drv_overriding_complex_range) + << LastOpt << NewOpt << complexRangeKindToStr(Range) + << complexRangeKindToStr(NewRange); } static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, @@ -2797,31 +2820,29 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, StringRef BFloat16ExcessPrecision = ""; LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None; std::string ComplexRangeStr; - std::string GccRangeComplexOption; - std::string LastComplexRangeOption; - - auto setComplexRange = [&](LangOptions::ComplexRangeKind NewRange) { - // Warn if user expects to perform full implementation of complex - // multiplication or division in the presence of nnan or ninf flags. - if (Range != NewRange) - EmitComplexRangeDiag(D, - !GccRangeComplexOption.empty() - ? GccRangeComplexOption - : ComplexArithmeticStr(Range), - ComplexArithmeticStr(NewRange)); + StringRef LastComplexRangeOption; + + auto setComplexRange = [&](StringRef NewOption, + LangOptions::ComplexRangeKind NewRange) { + // Warn if user overrides the previously set complex number + // multiplication/division option. + if (Range != LangOptions::ComplexRangeKind::CX_None && Range != NewRange) + EmitComplexRangeDiag(D, LastComplexRangeOption, Range, NewOption, + NewRange); + LastComplexRangeOption = NewOption; Range = NewRange; }; // Lambda to set fast-math options. This is also used by -ffp-model=fast - auto applyFastMath = [&](bool Aggressive) { + auto applyFastMath = [&](bool Aggressive, StringRef CallerOption) { if (Aggressive) { HonorINFs = false; HonorNaNs = false; - setComplexRange(LangOptions::ComplexRangeKind::CX_Basic); + setComplexRange(CallerOption, LangOptions::ComplexRangeKind::CX_Basic); } else { HonorINFs = true; HonorNaNs = true; - setComplexRange(LangOptions::ComplexRangeKind::CX_Promoted); + setComplexRange(CallerOption, LangOptions::ComplexRangeKind::CX_Promoted); } MathErrno = false; AssociativeMath = true; @@ -2873,54 +2894,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, default: continue; case options::OPT_fcx_limited_range: - if (GccRangeComplexOption.empty()) { - if (Range != LangOptions::ComplexRangeKind::CX_Basic) - EmitComplexRangeDiag(D, renderComplexRangeOption(Range), - "-fcx-limited-range"); - } else { - if (GccRangeComplexOption != "-fno-cx-limited-range") - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); - } - GccRangeComplexOption = "-fcx-limited-range"; - LastComplexRangeOption = A->getSpelling(); - Range = LangOptions::ComplexRangeKind::CX_Basic; + setComplexRange(A->getSpelling(), + LangOptions::ComplexRangeKind::CX_Basic); break; case options::OPT_fno_cx_limited_range: - if (GccRangeComplexOption.empty()) { - EmitComplexRangeDiag(D, renderComplexRangeOption(Range), - "-fno-cx-limited-range"); - } else { - if (GccRangeComplexOption != "-fcx-limited-range" && - GccRangeComplexOption != "-fno-cx-fortran-rules") - EmitComplexRangeDiag(D, GccRangeComplexOption, - "-fno-cx-limited-range"); - } - GccRangeComplexOption = "-fno-cx-limited-range"; - LastComplexRangeOption = A->getSpelling(); - Range = LangOptions::ComplexRangeKind::CX_Full; + setComplexRange(A->getSpelling(), LangOptions::ComplexRangeKind::CX_Full); break; case options::OPT_fcx_fortran_rules: - if (GccRangeComplexOption.empty()) - EmitComplexRangeDiag(D, renderComplexRangeOption(Range), - "-fcx-fortran-rules"); - else - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); - GccRangeComplexOption = "-fcx-fortran-rules"; - LastComplexRangeOption = A->getSpelling(); - Range = LangOptions::ComplexRangeKind::CX_Improved; + setComplexRange(A->getSpelling(), + LangOptions::ComplexRangeKind::CX_Improved); break; case options::OPT_fno_cx_fortran_rules: - if (GccRangeComplexOption.empty()) { - EmitComplexRangeDiag(D, renderComplexRangeOption(Range), - "-fno-cx-fortran-rules"); - } else { - if (GccRangeComplexOption != "-fno-cx-limited-range") - EmitComplexRangeDiag(D, GccRangeComplexOption, - "-fno-cx-fortran-rules"); - } - GccRangeComplexOption = "-fno-cx-fortran-rules"; - LastComplexRangeOption = A->getSpelling(); - Range = LangOptions::ComplexRangeKind::CX_Full; + setComplexRange(A->getSpelling(), LangOptions::ComplexRangeKind::CX_Full); break; case options::OPT_fcomplex_arithmetic_EQ: { LangOptions::ComplexRangeKind RangeVal; @@ -2938,25 +2923,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, << A->getSpelling() << Val; break; } - if (!GccRangeComplexOption.empty()) { - if (GccRangeComplexOption != "-fcx-limited-range") { - if (GccRangeComplexOption != "-fcx-fortran-rules") { - if (RangeVal != LangOptions::ComplexRangeKind::CX_Improved) - EmitComplexRangeDiag(D, GccRangeComplexOption, - ComplexArithmeticStr(RangeVal)); - } else { - EmitComplexRangeDiag(D, GccRangeComplexOption, - ComplexArithmeticStr(RangeVal)); - } - } else { - if (RangeVal != LangOptions::ComplexRangeKind::CX_Basic) - EmitComplexRangeDiag(D, GccRangeComplexOption, - ComplexArithmeticStr(RangeVal)); - } - } - LastComplexRangeOption = - Args.MakeArgString(A->getSpelling() + A->getValue()); - Range = RangeVal; + setComplexRange(Args.MakeArgString(A->getSpelling() + Val), RangeVal); break; } case options::OPT_ffp_model_EQ: { @@ -2984,19 +2951,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, << Args.MakeArgString("-ffp-model=" + Val); if (Val == "fast") { FPModel = Val; - applyFastMath(false); + applyFastMath(false, Args.MakeArgString(A->getSpelling() + Val)); // applyFastMath sets fp-contract="fast" LastFpContractOverrideOption = "-ffp-model=fast"; } else if (Val == "aggressive") { FPModel = Val; - applyFastMath(true); + applyFastMath(true, Args.MakeArgString(A->getSpelling() + Val)); // applyFastMath sets fp-contract="fast" LastFpContractOverrideOption = "-ffp-model=aggressive"; } else if (Val == "precise") { FPModel = Val; FPContract = "on"; LastFpContractOverrideOption = "-ffp-model=precise"; - setComplexRange(LangOptions::ComplexRangeKind::CX_Full); + setComplexRange(Args.MakeArgString(A->getSpelling() + Val), + LangOptions::ComplexRangeKind::CX_Full); } else if (Val == "strict") { StrictFPModel = true; FPExceptionBehavior = "strict"; @@ -3005,11 +2973,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, LastFpContractOverrideOption = "-ffp-model=strict"; TrappingMath = true; RoundingFPMath = true; - setComplexRange(LangOptions::ComplexRangeKind::CX_Full); + setComplexRange(Args.MakeArgString(A->getSpelling() + Val), + LangOptions::ComplexRangeKind::CX_Full); } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; - LastComplexRangeOption = A->getSpelling(); break; } @@ -3194,8 +3162,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, continue; [[fallthrough]]; case options::OPT_ffast_math: - applyFastMath(true); - LastComplexRangeOption = A->getSpelling(); + applyFastMath(true, A->getSpelling()); if (A->getOption().getID() == options::OPT_Ofast) LastFpContractOverrideOption = "-Ofast"; else @@ -3213,15 +3180,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ApproxFunc = false; SignedZeros = true; restoreFPContractState(); - // If the last specified option related to complex range is not - // -ffast-math or -ffp-model=, emit warning. - if (LastComplexRangeOption != "-ffast-math" && - LastComplexRangeOption != "-ffp-model=" && - Range != LangOptions::ComplexRangeKind::CX_Full) - EmitComplexRangeDiag(D, LastComplexRangeOption, "-fno-fast-math"); - Range = LangOptions::ComplexRangeKind::CX_None; + if (Range != LangOptions::ComplexRangeKind::CX_Full) + setComplexRange(A->getSpelling(), + LangOptions::ComplexRangeKind::CX_None); + else + Range = LangOptions::ComplexRangeKind::CX_None; LastComplexRangeOption = ""; - GccRangeComplexOption = ""; LastFpContractOverrideOption = ""; break; } // End switch (A->getOption().getID()) @@ -4608,6 +4572,10 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T, KeyInstructionsOnByDefault)) CmdArgs.push_back("-gkey-instructions"); + if (!Args.hasFlag(options::OPT_gstructor_decl_linkage_names, + options::OPT_gno_structor_decl_linkage_names, true)) + CmdArgs.push_back("-gno-structor-decl-linkage-names"); + if (EmitCodeView) { CmdArgs.push_back("-gcodeview"); @@ -5449,13 +5417,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, const bool IsAssertBuild = true; #endif - // Disable the verification pass in asserts builds unless otherwise specified. + // Disable the verification pass in no-asserts builds unless otherwise + // specified. if (Args.hasFlag(options::OPT_fno_verify_intermediate_code, options::OPT_fverify_intermediate_code, !IsAssertBuild)) { CmdArgs.push_back("-disable-llvm-verifier"); } - // Discard value names in assert builds unless otherwise specified. + // Discard value names in no-asserts builds unless otherwise specified. if (Args.hasFlag(options::OPT_fdiscard_value_names, options::OPT_fno_discard_value_names, !IsAssertBuild)) { if (Args.hasArg(options::OPT_fdiscard_value_names) && @@ -6756,7 +6725,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Triple.isAArch64() && !Triple.isLoongArch() && !Triple.isRISCV() && !Triple.isX86() && !(!Triple.isOSAIX() && (Triple.getArch() == llvm::Triple::ppc || - Triple.getArch() == llvm::Triple::ppc64))) + Triple.getArch() == llvm::Triple::ppc64 || + Triple.getArch() == llvm::Triple::ppc64le))) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; else if (S.consumeInteger(10, Size) || diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2994223..b505492 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -156,6 +156,9 @@ static bool useLeafFramePointerForTargetByDefault(const llvm::Triple &Triple) { (Triple.isAndroid() && !Triple.isARM())) return false; + if ((Triple.isARM() || Triple.isThumb()) && Triple.isOSBinFormatMachO()) + return false; + return true; } @@ -3513,8 +3516,9 @@ std::string tools::complexRangeKindToStr(LangOptions::ComplexRangeKind Range) { case LangOptions::ComplexRangeKind::CX_Promoted: return "promoted"; break; - default: - return ""; + case LangOptions::ComplexRangeKind::CX_None: + return "none"; + break; } } diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp index 559af32..f4858e4 100644 --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -62,11 +62,15 @@ bool isLegalShaderModel(Triple &T) { VersionTuple MinVer(6, 5); return MinVer <= Version; } break; + case Triple::EnvironmentType::RootSignature: + VersionTuple MinVer(1, 0); + VersionTuple MaxVer(1, 1); + return MinVer <= Version && Version <= MaxVer; } return false; } -std::optional<std::string> tryParseProfile(StringRef Profile) { +std::optional<llvm::Triple> tryParseTriple(StringRef Profile) { // [ps|vs|gs|hs|ds|cs|ms|as]_[major]_[minor] SmallVector<StringRef, 3> Parts; Profile.split(Parts, "_"); @@ -84,6 +88,7 @@ std::optional<std::string> tryParseProfile(StringRef Profile) { .Case("lib", Triple::EnvironmentType::Library) .Case("ms", Triple::EnvironmentType::Mesh) .Case("as", Triple::EnvironmentType::Amplification) + .Case("rootsig", Triple::EnvironmentType::RootSignature) .Default(Triple::EnvironmentType::UnknownEnvironment); if (Kind == Triple::EnvironmentType::UnknownEnvironment) return std::nullopt; @@ -147,8 +152,14 @@ std::optional<std::string> tryParseProfile(StringRef Profile) { T.setOSName(Triple::getOSTypeName(Triple::OSType::ShaderModel).str() + VersionTuple(Major, Minor).getAsString()); T.setEnvironment(Kind); - if (isLegalShaderModel(T)) - return T.getTriple(); + + return T; +} + +std::optional<std::string> tryParseProfile(StringRef Profile) { + std::optional<llvm::Triple> MaybeT = tryParseTriple(Profile); + if (MaybeT && isLegalShaderModel(*MaybeT)) + return MaybeT->getTriple(); else return std::nullopt; } @@ -258,6 +269,19 @@ bool checkExtensionArgsAreValid(ArrayRef<std::string> SpvExtensionArgs, } return AllValid; } + +bool isRootSignatureTarget(StringRef Profile) { + if (std::optional<llvm::Triple> T = tryParseTriple(Profile)) + return T->getEnvironment() == Triple::EnvironmentType::RootSignature; + return false; +} + +bool isRootSignatureTarget(DerivedArgList &Args) { + if (const Arg *A = Args.getLastArg(options::OPT_target_profile)) + return isRootSignatureTarget(A->getValue()); + return false; +} + } // namespace void tools::hlsl::Validator::ConstructJob(Compilation &C, const JobAction &JA, @@ -313,10 +337,22 @@ void tools::hlsl::LLVMObjcopy::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Output.getFilename()); if (Args.hasArg(options::OPT_dxc_strip_rootsignature)) { - const char *Frs = Args.MakeArgString("--remove-section=RTS0"); + const char *StripRS = Args.MakeArgString("--remove-section=RTS0"); + CmdArgs.push_back(StripRS); + } + + if (Arg *Arg = Args.getLastArg(options::OPT_dxc_Frs)) { + const char *Frs = + Args.MakeArgString("--extract-section=RTS0=" + Twine(Arg->getValue())); CmdArgs.push_back(Frs); } + if (const Arg *A = Args.getLastArg(options::OPT_target_profile)) + if (isRootSignatureTarget(A->getValue())) { + const char *Fos = Args.MakeArgString("--only-section=RTS0"); + CmdArgs.push_back(Fos); + } + assert(CmdArgs.size() > 2 && "Unnecessary invocation of objcopy."); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), @@ -493,7 +529,8 @@ bool HLSLToolChain::requiresBinaryTranslation(DerivedArgList &Args) const { bool HLSLToolChain::requiresObjcopy(DerivedArgList &Args) const { return Args.hasArg(options::OPT_dxc_Fo) && - Args.hasArg(options::OPT_dxc_strip_rootsignature); + (Args.hasArg(options::OPT_dxc_strip_rootsignature) || + Args.hasArg(options::OPT_dxc_Frs) || isRootSignatureTarget(Args)); } bool HLSLToolChain::isLastJob(DerivedArgList &Args, diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 8d1e9d6..31a8d75 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -554,9 +554,16 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { } std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) { + assert(FileMgr && "Specific module cache path requires a FileManager"); + + if (getHeaderSearchOpts().ModuleCachePath.empty()) + return ""; + // Set up the module path, including the hash for the module-creation options. - SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath); - if (!SpecificModuleCache.empty() && !getHeaderSearchOpts().DisableModuleHash) + SmallString<256> SpecificModuleCache; + normalizeModuleCachePath(*FileMgr, getHeaderSearchOpts().ModuleCachePath, + SpecificModuleCache); + if (!getHeaderSearchOpts().DisableModuleHash) llvm::sys::path::append(SpecificModuleCache, ModuleHash); return std::string(SpecificModuleCache); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 8411d00..931766d 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3315,9 +3315,6 @@ static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, if (Opts.UseLibcxx) GenerateArg(Consumer, OPT_stdlib_EQ, "libc++"); - if (!Opts.ModuleCachePath.empty()) - GenerateArg(Consumer, OPT_fmodules_cache_path, Opts.ModuleCachePath); - for (const auto &File : Opts.PrebuiltModuleFiles) GenerateArg(Consumer, OPT_fmodule_file, File.first + "=" + File.second); @@ -3420,8 +3417,7 @@ static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, } static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, - DiagnosticsEngine &Diags, - const std::string &WorkingDir) { + DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); HeaderSearchOptions *HeaderSearchOpts = &Opts; @@ -3434,17 +3430,6 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ)) Opts.UseLibcxx = (strcmp(A->getValue(), "libc++") == 0); - // Canonicalize -fmodules-cache-path before storing it. - SmallString<128> P(Args.getLastArgValue(OPT_fmodules_cache_path)); - if (!(P.empty() || llvm::sys::path::is_absolute(P))) { - if (WorkingDir.empty()) - llvm::sys::fs::make_absolute(P); - else - llvm::sys::fs::make_absolute(WorkingDir, P); - } - llvm::sys::path::remove_dots(P); - Opts.ModuleCachePath = std::string(P); - // Only the -fmodule-file=<name>=<file> form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); @@ -5021,8 +5006,7 @@ bool CompilerInvocation::CreateFromArgsImpl( InputKind DashX = Res.getFrontendOpts().DashX; ParseTargetArgs(Res.getTargetOpts(), Args, Diags); llvm::Triple T(Res.getTargetOpts().Triple); - ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags, - Res.getFileSystemOpts().WorkingDir); + ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags); if (Res.getFrontendOpts().GenReducedBMI || Res.getFrontendOpts().ProgramAction == frontend::GenerateReducedModuleInterface || diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index f183184..7424958 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -1310,16 +1310,27 @@ void HLSLFrontendAction::ExecuteAction() { /*CodeCompleteConsumer=*/nullptr); Sema &S = CI.getSema(); + auto &TargetInfo = CI.getASTContext().getTargetInfo(); + bool IsRootSignatureTarget = + TargetInfo.getTriple().getEnvironment() == llvm::Triple::RootSignature; + StringRef HLSLEntry = TargetInfo.getTargetOpts().HLSLEntry; + // Register HLSL specific callbacks auto LangOpts = CI.getLangOpts(); + StringRef RootSigName = + IsRootSignatureTarget ? HLSLEntry : LangOpts.HLSLRootSigOverride; + auto MacroCallback = std::make_unique<InjectRootSignatureCallback>( - S, LangOpts.HLSLRootSigOverride, LangOpts.HLSLRootSigVer); + S, RootSigName, LangOpts.HLSLRootSigVer); Preprocessor &PP = CI.getPreprocessor(); PP.addPPCallbacks(std::move(MacroCallback)); - // Invoke as normal - WrapperFrontendAction::ExecuteAction(); + // If we are targeting a root signature, invoke custom handling + if (IsRootSignatureTarget) + return hlsl::HandleRootSignatureTarget(S, HLSLEntry); + else // otherwise, invoke as normal + return WrapperFrontendAction::ExecuteAction(); } HLSLFrontendAction::HLSLFrontendAction( diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 2cacdc3..fc12a9b 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -496,10 +496,9 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu8(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu8(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b); } /// Computes the averages of the corresponding unsigned 16-bit integers in @@ -522,10 +521,9 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b); } /// Merges 8-bit integer values from either of the two 256-bit vectors diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 31e0a22..42fce7d 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -690,50 +690,40 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu8 (__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu8(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) -{ - return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)_mm512_setzero_si512()); + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu16 (__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu16(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi) _mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR @@ -1407,13 +1397,13 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) (imm)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1421,7 +1411,7 @@ _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1474,13 +1464,13 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { #define _mm512_bslli_epi128(a, imm) \ ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1488,7 +1478,7 @@ _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1496,13 +1486,13 @@ _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1510,7 +1500,7 @@ _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 67499fd8..7ba0903 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -4731,13 +4731,13 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4745,7 +4745,7 @@ _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4753,13 +4753,13 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -4767,7 +4767,7 @@ _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -4843,13 +4843,13 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_rol_epi64((a), (b)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4857,7 +4857,7 @@ _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4865,13 +4865,13 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -4879,7 +4879,7 @@ _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5517,13 +5517,13 @@ _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5531,7 +5531,7 @@ _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5602,13 +5602,13 @@ _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5616,7 +5616,7 @@ _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5687,13 +5687,13 @@ _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5701,7 +5701,7 @@ _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index 0120533..a24b6e59 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -19,6 +19,12 @@ __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), \ __min_vector_width__(512))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { @@ -213,14 +219,14 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B, (__v8du)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -228,7 +234,7 @@ _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -236,14 +242,14 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B, (__v16su)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -251,7 +257,7 @@ _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -259,14 +265,14 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B, (__v32hu)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -274,7 +280,7 @@ _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -282,7 +288,7 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { // Ops __A and __B are swapped. @@ -290,7 +296,7 @@ _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) (__v8du)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -298,7 +304,7 @@ _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -306,7 +312,7 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { // Ops __A and __B are swapped. @@ -314,7 +320,7 @@ _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) (__v16su)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, @@ -322,7 +328,7 @@ _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, @@ -330,7 +336,7 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { // Ops __A and __B are swapped. @@ -338,7 +344,7 @@ _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) (__v32hu)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -346,7 +352,7 @@ _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -356,6 +362,7 @@ _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 846cda67..6e3efa7 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -795,68 +795,56 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qi)_mm_avg_epu8(__A, __B), - (__v16qi)__W); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, - (__v32qi)_mm256_avg_epu8(__A, __B), - (__v32qi)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_avg_epu16(__A, __B), - (__v8hi)__W); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR @@ -1829,13 +1817,13 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) (imm)), \ (__v16hi)_mm256_setzero_si256())) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1843,7 +1831,7 @@ _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1851,13 +1839,13 @@ _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1865,7 +1853,7 @@ _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1936,13 +1924,13 @@ _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1950,7 +1938,7 @@ _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1958,13 +1946,13 @@ _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1972,7 +1960,7 @@ _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1980,13 +1968,13 @@ _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1994,7 +1982,7 @@ _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2002,13 +1990,13 @@ _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2016,7 +2004,7 @@ _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 47f45ec..d85ea23 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -4180,13 +4180,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4di)_mm256_rol_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4194,7 +4194,7 @@ _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4202,13 +4202,13 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4216,7 +4216,7 @@ _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4224,13 +4224,13 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4238,7 +4238,7 @@ _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4246,13 +4246,13 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4260,7 +4260,7 @@ _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4446,13 +4446,13 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4460,7 +4460,7 @@ _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4468,13 +4468,13 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4482,7 +4482,7 @@ _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4490,13 +4490,13 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4504,7 +4504,7 @@ _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4512,13 +4512,13 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4526,7 +4526,7 @@ _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4820,13 +4820,13 @@ _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4834,7 +4834,7 @@ _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4842,13 +4842,13 @@ _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4856,7 +4856,7 @@ _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 3d6e99b..da295d2 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -24,6 +24,14 @@ __target__("avx512vl,avx512vbmi2"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { @@ -412,14 +420,14 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ (__v8hi)_mm_setzero_si128())) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B, (__v4du)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -427,7 +435,7 @@ _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -435,14 +443,14 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B, (__v2du)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -450,7 +458,7 @@ _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -458,14 +466,14 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B, (__v8su)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -473,7 +481,7 @@ _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -481,14 +489,14 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B, (__v4su)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -496,7 +504,7 @@ _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -504,14 +512,14 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B, (__v16hu)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -519,7 +527,7 @@ _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -527,14 +535,14 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B, (__v8hu)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -542,7 +550,7 @@ _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -550,7 +558,7 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) { // Ops __A and __B are swapped. @@ -558,7 +566,7 @@ _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) (__v4du)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -566,7 +574,7 @@ _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -574,7 +582,7 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) { // Ops __A and __B are swapped. @@ -582,7 +590,7 @@ _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) (__v2du)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -590,7 +598,7 @@ _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -598,7 +606,7 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) { // Ops __A and __B are swapped. @@ -606,7 +614,7 @@ _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) (__v8su)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -614,7 +622,7 @@ _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -622,7 +630,7 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) { // Ops __A and __B are swapped. @@ -630,7 +638,7 @@ _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) (__v4su)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -638,7 +646,7 @@ _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -646,7 +654,7 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) { // Ops __A and __B are swapped. @@ -654,7 +662,7 @@ _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) (__v16hu)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -662,7 +670,7 @@ _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -670,7 +678,7 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) { // Ops __A and __B are swapped. @@ -678,7 +686,7 @@ _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) (__v8hu)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -686,7 +694,7 @@ _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -694,8 +702,9 @@ _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v8hi)_mm_setzero_si128()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h index bc76348..a1a0338 100644 --- a/clang/lib/Headers/avx512vlvnniintrin.h +++ b/clang/lib/Headers/avx512vlvnniintrin.h @@ -41,8 +41,8 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpbusd_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpbusd_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v32qu)(A), (__v32qi)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -61,8 +61,9 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpbusds_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpbusds_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v32qu)(A), \ + (__v32qi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -117,8 +118,8 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpbusd_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpbusd_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v16qu)(A), (__v16qi)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -137,8 +138,9 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpbusds_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpbusds_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v16qu)(A), \ + (__v16qi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h index 0b722af..c386923 100644 --- a/clang/lib/Headers/avx512vnniintrin.h +++ b/clang/lib/Headers/avx512vnniintrin.h @@ -22,8 +22,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -45,8 +45,8 @@ _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index e9d8be3..a7f7099 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4822,9 +4822,8 @@ _mm256_zextsi128_si256(__m128i __a) { /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_set_m128 (__m128 __hi, __m128 __lo) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128(__m128 __hi, __m128 __lo) { return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } @@ -4843,9 +4842,8 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_set_m128d (__m128d __hi, __m128d __lo) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128d(__m128d __hi, __m128d __lo) { return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } @@ -4863,9 +4861,8 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo) /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_set_m128i (__m128i __hi, __m128i __lo) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128i(__m128i __hi, __m128i __lo) { return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } @@ -4886,9 +4883,8 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_setr_m128 (__m128 __lo, __m128 __hi) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128(__m128 __lo, __m128 __hi) { return _mm256_set_m128(__hi, __lo); } @@ -4909,9 +4905,8 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_setr_m128d (__m128d __lo, __m128d __hi) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128d(__m128d __lo, __m128d __hi) { return (__m256d)_mm256_set_m128d(__hi, __lo); } @@ -4930,9 +4925,8 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi) /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setr_m128i (__m128i __lo, __m128i __hi) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128i(__m128i __lo, __m128i __hi) { return (__m256i)_mm256_set_m128i(__hi, __lo); } diff --git a/clang/lib/Headers/avxvnniintrin.h b/clang/lib/Headers/avxvnniintrin.h index b7de562..3c4c44a 100644 --- a/clang/lib/Headers/avxvnniintrin.h +++ b/clang/lib/Headers/avxvnniintrin.h @@ -63,7 +63,8 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A, + (__v32qi)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with @@ -86,7 +87,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A, + (__v32qi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with @@ -151,7 +153,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A, + (__v16qi)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with @@ -174,7 +177,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A, + (__v16qi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index ce8c79e..45700c6 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -348,7 +348,7 @@ static __inline int __get_cpuid_count (unsigned int __leaf, // In some cases, offloading will set the host as the aux triple and define the // builtin. Given __has_builtin does not detect builtins on aux triples, we need // to explicitly check for some offloading cases. -#ifndef __NVPTX__ +#if !defined(__NVPTX__) && !defined(__AMDGPU__) && !defined(__SPIRV__) static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) { __cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2], __cpu_info[3]); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index a366e0d..e4fbe01 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -27,7 +27,6 @@ typedef double __v2df __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__((__vector_size__(16))); -typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ @@ -2247,9 +2246,9 @@ _mm_adds_epu16(__m128i __a, __m128i __b) { /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b); } /// Computes the rounded averages of corresponding elements of two @@ -2266,9 +2265,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 4b52904..6b70f24 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); typedef unsigned short __v8hu __attribute__((__vector_size__(16))); +typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ @@ -2539,11 +2540,10 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pavgb128((__v16qu)__zext128(__a), + (__v16qu)__zext128(__b))); } /// Computes the rounded averages of the packed unsigned 16-bit integer @@ -2559,11 +2559,10 @@ _mm_avg_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu16(__m64 __a, __m64 __b) { + return __trunc64( + __builtin_ia32_pavgw128((__v8hu)__zext128(__a), (__v8hu)__zext128(__b))); } /// Subtracts the corresponding 8-bit unsigned integer values of the two diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h index 7015719..aba632f 100644 --- a/clang/lib/Headers/xopintrin.h +++ b/clang/lib/Headers/xopintrin.h @@ -208,25 +208,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index ea2391f..f28a74f 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -2183,3 +2183,10 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics( } return path::convert_to_slash(Filename); } + +void clang::normalizeModuleCachePath(FileManager &FileMgr, StringRef Path, + SmallVectorImpl<char> &NormalizedPath) { + NormalizedPath.assign(Path.begin(), Path.end()); + FileMgr.makeAbsolutePath(NormalizedPath); + llvm::sys::path::remove_dots(NormalizedPath); +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 9d01b8d..360593d 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -3746,6 +3746,8 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) { if (Result.isNegative()) { Diag(CurTok, diag::err_requires_positive_value) << toString(Result, 10) << /*positive*/ 0; + if (CurTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(CurTok); return std::nullopt; } return Result.getLimitedValue(); diff --git a/clang/lib/Parse/ParseHLSL.cpp b/clang/lib/Parse/ParseHLSL.cpp index f243b0c..51f2aef 100644 --- a/clang/lib/Parse/ParseHLSL.cpp +++ b/clang/lib/Parse/ParseHLSL.cpp @@ -118,6 +118,46 @@ static void fixSeparateAttrArgAndNumber(StringRef ArgStr, SourceLocation ArgLoc, Slot = new (Ctx) IdentifierLoc(ArgLoc, PP.getIdentifierInfo(FixedArg)); } +Parser::ParsedSemantic Parser::ParseHLSLSemantic() { + assert(Tok.is(tok::identifier) && "Not a HLSL Annotation"); + + // Semantic pattern: [A-Za-z_]([A-Za-z_0-9]*[A-Za-z_])?[0-9]* + // The first part is the semantic name, the second is the optional + // semantic index. The semantic index is the number at the end of + // the semantic, including leading zeroes. Digits located before + // the last letter are part of the semantic name. + bool Invalid = false; + SmallString<256> Buffer; + Buffer.resize(Tok.getLength() + 1); + StringRef Identifier = PP.getSpelling(Tok, Buffer); + if (Invalid) { + Diag(Tok.getLocation(), diag::err_expected_semantic_identifier); + return {}; + } + + assert(Identifier.size() > 0); + // Determine the start of the semantic index. + unsigned IndexIndex = Identifier.find_last_not_of("0123456789") + 1; + + // ParseHLSLSemantic being called on an indentifier, the first + // character cannot be a digit. This error should be handled by + // the caller. We can assert here. + StringRef SemanticName = Identifier.take_front(IndexIndex); + assert(SemanticName.size() > 0); + + unsigned Index = 0; + bool Explicit = false; + if (IndexIndex != Identifier.size()) { + Explicit = true; + [[maybe_unused]] bool Failure = + Identifier.substr(IndexIndex).getAsInteger(10, Index); + // Given the logic above, this should never fail. + assert(!Failure); + } + + return {SemanticName, Index, Explicit}; +} + void Parser::ParseHLSLAnnotations(ParsedAttributes &Attrs, SourceLocation *EndLoc, bool CouldBeBitField) { @@ -141,11 +181,15 @@ void Parser::ParseHLSLAnnotations(ParsedAttributes &Attrs, return; } + ParsedAttr::Kind AttrKind = + ParsedAttr::getParsedKind(II, nullptr, ParsedAttr::AS_HLSLAnnotation); + Parser::ParsedSemantic Semantic; + if (AttrKind == ParsedAttr::AT_HLSLUnparsedSemantic) + Semantic = ParseHLSLSemantic(); + SourceLocation Loc = ConsumeToken(); if (EndLoc) *EndLoc = Tok.getLocation(); - ParsedAttr::Kind AttrKind = - ParsedAttr::getParsedKind(II, nullptr, ParsedAttr::AS_HLSLAnnotation); ArgsVector ArgExprs; switch (AttrKind) { @@ -282,14 +326,17 @@ void Parser::ParseHLSLAnnotations(ParsedAttributes &Attrs, return; } } break; - case ParsedAttr::UnknownAttribute: - Diag(Loc, diag::err_unknown_hlsl_semantic) << II; - return; - case ParsedAttr::AT_HLSLSV_GroupThreadID: - case ParsedAttr::AT_HLSLSV_GroupID: - case ParsedAttr::AT_HLSLSV_GroupIndex: - case ParsedAttr::AT_HLSLSV_DispatchThreadID: - case ParsedAttr::AT_HLSLSV_Position: + case ParsedAttr::AT_HLSLUnparsedSemantic: { + ASTContext &Ctx = Actions.getASTContext(); + ArgExprs.push_back(IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getTypeSize(Ctx.IntTy), Semantic.Index), Ctx.IntTy, + SourceLocation())); + ArgExprs.push_back(IntegerLiteral::Create( + Ctx, llvm::APInt(1, Semantic.Explicit), Ctx.BoolTy, SourceLocation())); + II = PP.getIdentifierInfo(Semantic.Name); + break; + } + case ParsedAttr::UnknownAttribute: // FIXME: maybe this is obsolete? break; default: llvm_unreachable("invalid HLSL Annotation"); diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 1af72f8..3b16efb 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// #include "clang/Parse/ParseHLSLRootSignature.h" - +#include "clang/AST/ASTConsumer.h" #include "clang/Lex/LiteralSupport.h" +#include "clang/Parse/Parser.h" #include "clang/Sema/Sema.h" using namespace llvm::hlsl::rootsig; @@ -37,8 +38,18 @@ bool RootSignatureParser::parse() { // Iterate as many RootSignatureElements as possible, until we hit the // end of the stream bool HadError = false; + bool HasRootFlags = false; while (!peekExpectedToken(TokenKind::end_of_stream)) { if (tryConsumeExpectedToken(TokenKind::kw_RootFlags)) { + if (HasRootFlags) { + reportDiag(diag::err_hlsl_rootsig_repeat_param) + << TokenKind::kw_RootFlags; + HadError = true; + skipUntilExpectedToken(RootElementKeywords); + continue; + } + HasRootFlags = true; + SourceLocation ElementLoc = getTokenLocation(CurToken); auto Flags = parseRootFlags(); if (!Flags.has_value()) { @@ -1472,5 +1483,38 @@ IdentifierInfo *ParseHLSLRootSignature(Sema &Actions, return DeclIdent; } +void HandleRootSignatureTarget(Sema &S, StringRef EntryRootSig) { + ASTConsumer *Consumer = &S.getASTConsumer(); + + // Minimally initalize the parser. This does a couple things: + // - initializes Sema scope handling + // - invokes HLSLExternalSemaSource + // - invokes the preprocessor to lex the macros in the file + std::unique_ptr<Parser> P(new Parser(S.getPreprocessor(), S, true)); + S.getPreprocessor().EnterMainSourceFile(); + + bool HaveLexer = S.getPreprocessor().getCurrentLexer(); + if (HaveLexer) { + P->Initialize(); + S.ActOnStartOfTranslationUnit(); + + // Skim through the file to parse to find the define + while (P->getCurToken().getKind() != tok::eof) + P->ConsumeAnyToken(); + + HLSLRootSignatureDecl *SignatureDecl = + S.HLSL().lookupRootSignatureOverrideDecl( + S.getASTContext().getTranslationUnitDecl()); + + if (SignatureDecl) + Consumer->HandleTopLevelDecl(DeclGroupRef(SignatureDecl)); + else + S.getDiagnostics().Report(diag::err_hlsl_rootsignature_entry) + << EntryRootSig; + } + + Consumer->HandleTranslationUnit(S.getASTContext()); +} + } // namespace hlsl } // namespace clang diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp index 4833856..d434ddb 100644 --- a/clang/lib/Parse/ParseStmtAsm.cpp +++ b/clang/lib/Parse/ParseStmtAsm.cpp @@ -509,13 +509,12 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { // We need an actual supported target. const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple(); - const std::string &TT = TheTriple.getTriple(); const llvm::Target *TheTarget = nullptr; if (!TheTriple.isX86()) { Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName(); } else { std::string Error; - TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error); + TheTarget = llvm::TargetRegistry::lookupTarget(TheTriple, Error); if (!TheTarget) Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error; } diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp index b8591b0..ecf9cfd 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Specifiers.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaHLSL.h" @@ -102,6 +103,13 @@ private: : NameII(NameII), Ty(Ty), Modifier(Modifier) {} }; + struct LocalVar { + StringRef Name; + QualType Ty; + VarDecl *Decl; + LocalVar(StringRef Name, QualType Ty) : Name(Name), Ty(Ty), Decl(nullptr) {} + }; + BuiltinTypeDeclBuilder &DeclBuilder; DeclarationName Name; QualType ReturnTy; @@ -110,6 +118,7 @@ private: CXXMethodDecl *Method; bool IsConst; bool IsCtor; + StorageClass SC; llvm::SmallVector<Param> Params; llvm::SmallVector<Stmt *> StmtsList; @@ -123,6 +132,7 @@ private: enum class PlaceHolder { _0, _1, _2, _3, _4, Handle = 128, LastStmt }; Expr *convertPlaceholder(PlaceHolder PH); + Expr *convertPlaceholder(LocalVar &Var); Expr *convertPlaceholder(Expr *E) { return E; } public: @@ -130,13 +140,13 @@ public: BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, DeclarationName &Name, QualType ReturnTy, bool IsConst = false, - bool IsCtor = false) + bool IsCtor = false, StorageClass SC = SC_None) : DeclBuilder(DB), Name(Name), ReturnTy(ReturnTy), Method(nullptr), - IsConst(IsConst), IsCtor(IsCtor) {} + IsConst(IsConst), IsCtor(IsCtor), SC(SC) {} BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef NameStr, QualType ReturnTy, bool IsConst = false, - bool IsCtor = false); + bool IsCtor = false, StorageClass SC = SC_None); BuiltinTypeMethodBuilder(const BuiltinTypeMethodBuilder &Other) = delete; ~BuiltinTypeMethodBuilder() { finalize(); } @@ -147,18 +157,22 @@ public: BuiltinTypeMethodBuilder &addParam(StringRef Name, QualType Ty, HLSLParamModifierAttr::Spelling Modifier = HLSLParamModifierAttr::Keyword_in); + BuiltinTypeMethodBuilder &declareLocalVar(LocalVar &Var); template <typename... Ts> BuiltinTypeMethodBuilder &callBuiltin(StringRef BuiltinName, QualType ReturnType, Ts... ArgSpecs); template <typename TLHS, typename TRHS> BuiltinTypeMethodBuilder &assign(TLHS LHS, TRHS RHS); template <typename T> BuiltinTypeMethodBuilder &dereference(T Ptr); - BuiltinTypeDeclBuilder &finalize(); - Expr *getResourceHandleExpr(); - template <typename T> - BuiltinTypeMethodBuilder &getResourceHandle(T ResourceRecord); + BuiltinTypeMethodBuilder &accessHandleFieldOnResource(T ResourceRecord); + template <typename ResourceT, typename ValueT> + BuiltinTypeMethodBuilder &setHandleFieldOnResource(ResourceT ResourceRecord, + ValueT HandleValue); + template <typename T> BuiltinTypeMethodBuilder &returnValue(T ReturnValue); BuiltinTypeMethodBuilder &returnThis(); + BuiltinTypeDeclBuilder &finalize(); + Expr *getResourceHandleExpr(); private: void createDecl(); @@ -339,12 +353,22 @@ Expr *BuiltinTypeMethodBuilder::convertPlaceholder(PlaceHolder PH) { ParamDecl->getType().getNonReferenceType(), VK_PRValue); } +Expr *BuiltinTypeMethodBuilder::convertPlaceholder(LocalVar &Var) { + VarDecl *VD = Var.Decl; + assert(VD && "local variable is not declared"); + return DeclRefExpr::Create( + VD->getASTContext(), NestedNameSpecifierLoc(), SourceLocation(), VD, + false, DeclarationNameInfo(VD->getDeclName(), SourceLocation()), + VD->getType(), VK_LValue); +} + BuiltinTypeMethodBuilder::BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef NameStr, QualType ReturnTy, - bool IsConst, bool IsCtor) + bool IsConst, bool IsCtor, + StorageClass SC) : DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst), - IsCtor(IsCtor) { + IsCtor(IsCtor), SC(SC) { assert((!NameStr.empty() || IsCtor) && "method needs a name"); assert(((IsCtor && !IsConst) || !IsCtor) && "constructor cannot be const"); @@ -394,10 +418,9 @@ void BuiltinTypeMethodBuilder::createDecl() { ExplicitSpecifier(), false, true, false, ConstexprSpecKind::Unspecified); else - Method = - CXXMethodDecl::Create(AST, DeclBuilder.Record, SourceLocation(), - NameInfo, FuncTy, TSInfo, SC_None, false, false, - ConstexprSpecKind::Unspecified, SourceLocation()); + Method = CXXMethodDecl::Create( + AST, DeclBuilder.Record, SourceLocation(), NameInfo, FuncTy, TSInfo, SC, + false, false, ConstexprSpecKind::Unspecified, SourceLocation()); // create params & set them to the function prototype SmallVector<ParmVarDecl *> ParmDecls; @@ -435,19 +458,20 @@ Expr *BuiltinTypeMethodBuilder::getResourceHandleExpr() { OK_Ordinary); } -template <typename T> BuiltinTypeMethodBuilder & -BuiltinTypeMethodBuilder::getResourceHandle(T ResourceRecord) { +BuiltinTypeMethodBuilder::declareLocalVar(LocalVar &Var) { ensureCompleteDecl(); - Expr *ResourceExpr = convertPlaceholder(ResourceRecord); + assert(Var.Decl == nullptr && "local variable is already declared"); ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); - FieldDecl *HandleField = DeclBuilder.getResourceHandleField(); - MemberExpr *HandleExpr = MemberExpr::CreateImplicit( - AST, ResourceExpr, /*IsArrow=*/false, HandleField, HandleField->getType(), - VK_LValue, OK_Ordinary); - StmtsList.push_back(HandleExpr); + Var.Decl = VarDecl::Create( + AST, Method, SourceLocation(), SourceLocation(), + &AST.Idents.get(Var.Name, tok::TokenKind::identifier), Var.Ty, + AST.getTrivialTypeSourceInfo(Var.Ty, SourceLocation()), SC_None); + DeclStmt *DS = new (AST) clang::DeclStmt(DeclGroupRef(Var.Decl), + SourceLocation(), SourceLocation()); + StmtsList.push_back(DS); return *this; } @@ -464,11 +488,11 @@ template <typename... Ts> BuiltinTypeMethodBuilder & BuiltinTypeMethodBuilder::callBuiltin(StringRef BuiltinName, QualType ReturnType, Ts... ArgSpecs) { + ensureCompleteDecl(); + std::array<Expr *, sizeof...(ArgSpecs)> Args{ convertPlaceholder(std::forward<Ts>(ArgSpecs))...}; - ensureCompleteDecl(); - ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); FunctionDecl *FD = lookupBuiltinFunction(DeclBuilder.SemaRef, BuiltinName); DeclRefExpr *DRE = DeclRefExpr::Create( @@ -512,6 +536,55 @@ BuiltinTypeMethodBuilder &BuiltinTypeMethodBuilder::dereference(T Ptr) { return *this; } +template <typename T> +BuiltinTypeMethodBuilder & +BuiltinTypeMethodBuilder::accessHandleFieldOnResource(T ResourceRecord) { + ensureCompleteDecl(); + + Expr *ResourceExpr = convertPlaceholder(ResourceRecord); + + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + FieldDecl *HandleField = DeclBuilder.getResourceHandleField(); + MemberExpr *HandleExpr = MemberExpr::CreateImplicit( + AST, ResourceExpr, false, HandleField, HandleField->getType(), VK_LValue, + OK_Ordinary); + StmtsList.push_back(HandleExpr); + return *this; +} + +template <typename ResourceT, typename ValueT> +BuiltinTypeMethodBuilder & +BuiltinTypeMethodBuilder::setHandleFieldOnResource(ResourceT ResourceRecord, + ValueT HandleValue) { + ensureCompleteDecl(); + + Expr *ResourceExpr = convertPlaceholder(ResourceRecord); + Expr *HandleValueExpr = convertPlaceholder(HandleValue); + + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + FieldDecl *HandleField = DeclBuilder.getResourceHandleField(); + MemberExpr *HandleMemberExpr = MemberExpr::CreateImplicit( + AST, ResourceExpr, false, HandleField, HandleField->getType(), VK_LValue, + OK_Ordinary); + Stmt *AssignStmt = BinaryOperator::Create( + DeclBuilder.SemaRef.getASTContext(), HandleMemberExpr, HandleValueExpr, + BO_Assign, HandleMemberExpr->getType(), ExprValueKind::VK_PRValue, + ExprObjectKind::OK_Ordinary, SourceLocation(), FPOptionsOverride()); + StmtsList.push_back(AssignStmt); + return *this; +} + +template <typename T> +BuiltinTypeMethodBuilder &BuiltinTypeMethodBuilder::returnValue(T ReturnValue) { + ensureCompleteDecl(); + + Expr *ReturnValueExpr = convertPlaceholder(ReturnValue); + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + StmtsList.push_back( + ReturnStmt::Create(AST, SourceLocation(), ReturnValueExpr, nullptr)); + return *this; +} + BuiltinTypeDeclBuilder &BuiltinTypeMethodBuilder::finalize() { assert(!DeclBuilder.Record->isCompleteDefinition() && "record is already complete"); @@ -539,7 +612,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeMethodBuilder::finalize() { Method->setBody(CompoundStmt::Create(AST, StmtsList, FPOptionsOverride(), SourceLocation(), SourceLocation())); Method->setLexicalDeclContext(DeclBuilder.Record); - Method->setAccess(AccessSpecifier::AS_public); + Method->setAccess(AS_public); Method->addAttr(AlwaysInlineAttr::CreateImplicit( AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline)); DeclBuilder.Record->addDecl(Method); @@ -705,6 +778,82 @@ BuiltinTypeDeclBuilder::addHandleConstructorFromImplicitBinding() { .finalize(); } +// Adds static method that initializes resource from binding: +// +// static Resource<T> __createFromBinding(unsigned registerNo, +// unsigned spaceNo, int range, +// unsigned index, const char *name) { +// Resource<T> tmp; +// tmp.__handle = __builtin_hlsl_resource_handlefrombinding( +// tmp.__handle, registerNo, spaceNo, +// range, index, name); +// return tmp; +// } +BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCreateFromBinding() { + if (Record->isCompleteDefinition()) + return *this; + + using PH = BuiltinTypeMethodBuilder::PlaceHolder; + ASTContext &AST = SemaRef.getASTContext(); + QualType HandleType = getResourceHandleField()->getType(); + QualType RecordType = AST.getTypeDeclType(cast<TypeDecl>(Record)); + BuiltinTypeMethodBuilder::LocalVar TmpVar("tmp", RecordType); + + return BuiltinTypeMethodBuilder(*this, "__createFromBinding", RecordType, + false, false, SC_Static) + .addParam("registerNo", AST.UnsignedIntTy) + .addParam("spaceNo", AST.UnsignedIntTy) + .addParam("range", AST.IntTy) + .addParam("index", AST.UnsignedIntTy) + .addParam("name", AST.getPointerType(AST.CharTy.withConst())) + .declareLocalVar(TmpVar) + .accessHandleFieldOnResource(TmpVar) + .callBuiltin("__builtin_hlsl_resource_handlefrombinding", HandleType, + PH::LastStmt, PH::_0, PH::_1, PH::_2, PH::_3, PH::_4) + .setHandleFieldOnResource(TmpVar, PH::LastStmt) + .returnValue(TmpVar) + .finalize(); +} + +// Adds static method that initializes resource from binding: +// +// static Resource<T> __createFromImplicitBinding(unsigned orderId, +// unsigned spaceNo, int range, +// unsigned index, +// const char *name) { +// Resource<T> tmp; +// tmp.__handle = __builtin_hlsl_resource_handlefromimplicitbinding( +// tmp.__handle, spaceNo, +// range, index, orderId, name); +// return tmp; +// } +BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCreateFromImplicitBinding() { + if (Record->isCompleteDefinition()) + return *this; + + using PH = BuiltinTypeMethodBuilder::PlaceHolder; + ASTContext &AST = SemaRef.getASTContext(); + QualType HandleType = getResourceHandleField()->getType(); + QualType RecordType = AST.getTypeDeclType(cast<TypeDecl>(Record)); + BuiltinTypeMethodBuilder::LocalVar TmpVar("tmp", RecordType); + + return BuiltinTypeMethodBuilder(*this, "__createFromImplicitBinding", + RecordType, false, false, SC_Static) + .addParam("orderId", AST.UnsignedIntTy) + .addParam("spaceNo", AST.UnsignedIntTy) + .addParam("range", AST.IntTy) + .addParam("index", AST.UnsignedIntTy) + .addParam("name", AST.getPointerType(AST.CharTy.withConst())) + .declareLocalVar(TmpVar) + .accessHandleFieldOnResource(TmpVar) + .callBuiltin("__builtin_hlsl_resource_handlefromimplicitbinding", + HandleType, PH::LastStmt, PH::_0, PH::_1, PH::_2, PH::_3, + PH::_4) + .setHandleFieldOnResource(TmpVar, PH::LastStmt) + .returnValue(TmpVar) + .finalize(); +} + BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyConstructor() { if (Record->isCompleteDefinition()) return *this; @@ -719,7 +868,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyConstructor() { return BuiltinTypeMethodBuilder(*this, /*Name=*/"", AST.VoidTy, /*IsConst=*/false, /*IsCtor=*/true) .addParam("other", ConstRecordRefType) - .getResourceHandle(PH::_0) + .accessHandleFieldOnResource(PH::_0) .assign(PH::Handle, PH::LastStmt) .finalize(); } @@ -738,7 +887,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyAssignmentOperator() { DeclarationName Name = AST.DeclarationNames.getCXXOperatorName(OO_Equal); return BuiltinTypeMethodBuilder(*this, Name, RecordRefType) .addParam("other", ConstRecordRefType) - .getResourceHandle(PH::_0) + .accessHandleFieldOnResource(PH::_0) .assign(PH::Handle, PH::LastStmt) .returnThis() .finalize(); diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h index 4c4c208..b898417 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h @@ -83,6 +83,10 @@ public: BuiltinTypeDeclBuilder &addCopyConstructor(); BuiltinTypeDeclBuilder &addCopyAssignmentOperator(); + // Static create methods + BuiltinTypeDeclBuilder &addCreateFromBinding(); + BuiltinTypeDeclBuilder &addCreateFromImplicitBinding(); + // Builtin types methods BuiltinTypeDeclBuilder &addLoadMethods(); BuiltinTypeDeclBuilder &addIncrementCounterMethod(); diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 8c893c0..3386d8d 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -134,6 +134,8 @@ static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S, .addDefaultHandleConstructor() .addCopyConstructor() .addCopyAssignmentOperator() + .addCreateFromBinding() + .addCreateFromImplicitBinding() .addHandleConstructorFromBinding() .addHandleConstructorFromImplicitBinding(); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 365ebb6..7c1459e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1653,6 +1653,17 @@ void Sema::FilterLookupForScope(LookupResult &R, DeclContext *Ctx, Scope *S, F.done(); } +static bool isImplicitInstantiation(NamedDecl *D) { + if (auto *VD = dyn_cast<VarDecl>(D)) + return VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation; + if (auto *FD = dyn_cast<FunctionDecl>(D)) + return FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation; + if (auto *RD = dyn_cast<CXXRecordDecl>(D)) + return RD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation; + + return false; +} + bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { // [module.interface]p7: // A declaration is attached to a module as follows: @@ -1668,6 +1679,14 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { return false; } + // Although we have questions for the module ownership of implicit + // instantiations, it should be sure that we shouldn't diagnose the + // redeclaration of incorrect module ownership for different implicit + // instantiations in different modules. We will diagnose the redeclaration of + // incorrect module ownership for the template itself. + if (isImplicitInstantiation(New) || isImplicitInstantiation(Old)) + return false; + Module *NewM = New->getOwningModule(); Module *OldM = Old->getOwningModule(); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index cb2c132c..4490645 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7465,9 +7465,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLWaveSize: S.HLSL().handleWaveSizeAttr(D, AL); break; - case ParsedAttr::AT_HLSLSV_Position: - S.HLSL().handleSV_PositionAttr(D, AL); - break; case ParsedAttr::AT_HLSLVkExtBuiltinInput: S.HLSL().handleVkExtBuiltinInputAttr(D, AL); break; @@ -7477,21 +7474,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLVkBinding: S.HLSL().handleVkBindingAttr(D, AL); break; - case ParsedAttr::AT_HLSLSV_GroupThreadID: - S.HLSL().handleSV_GroupThreadIDAttr(D, AL); - break; - case ParsedAttr::AT_HLSLSV_GroupID: - S.HLSL().handleSV_GroupIDAttr(D, AL); - break; - case ParsedAttr::AT_HLSLSV_GroupIndex: - handleSimpleAttribute<HLSLSV_GroupIndexAttr>(S, D, AL); - break; case ParsedAttr::AT_HLSLGroupSharedAddressSpace: handleSimpleAttribute<HLSLGroupSharedAddressSpaceAttr>(S, D, AL); break; - case ParsedAttr::AT_HLSLSV_DispatchThreadID: - S.HLSL().handleSV_DispatchThreadIDAttr(D, AL); - break; case ParsedAttr::AT_HLSLPackOffset: S.HLSL().handlePackOffsetAttr(D, AL); break; @@ -7504,6 +7489,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLParamModifier: S.HLSL().handleParamModifierAttr(D, AL); break; + case ParsedAttr::AT_HLSLUnparsedSemantic: + S.HLSL().handleSemanticAttr(D, AL); + break; case ParsedAttr::AT_AbiTag: handleAbiTagAttr(S, D, AL); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 317b7ca..aba00dc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6313,30 +6313,38 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, unsigned i = 0; SmallVector<QualType, 8> OverloadParams; - for (QualType ParamType : FT->param_types()) { + { + // The lvalue conversions in this loop are only for type resolution and + // don't actually occur. + EnterExpressionEvaluationContext Unevaluated( + *Sema, Sema::ExpressionEvaluationContext::Unevaluated); + Sema::SFINAETrap Trap(*Sema, /*ForValidityCheck=*/true); - // Convert array arguments to pointer to simplify type lookup. - ExprResult ArgRes = - Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]); - if (ArgRes.isInvalid()) - return nullptr; - Expr *Arg = ArgRes.get(); - QualType ArgType = Arg->getType(); - if (!ParamType->isPointerType() || - ParamType->getPointeeType().hasAddressSpace() || - !ArgType->isPointerType() || - !ArgType->getPointeeType().hasAddressSpace() || - isPtrSizeAddressSpace(ArgType->getPointeeType().getAddressSpace())) { - OverloadParams.push_back(ParamType); - continue; - } + for (QualType ParamType : FT->param_types()) { - QualType PointeeType = ParamType->getPointeeType(); - NeedsNewDecl = true; - LangAS AS = ArgType->getPointeeType().getAddressSpace(); + // Convert array arguments to pointer to simplify type lookup. + ExprResult ArgRes = + Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]); + if (ArgRes.isInvalid()) + return nullptr; + Expr *Arg = ArgRes.get(); + QualType ArgType = Arg->getType(); + if (!ParamType->isPointerType() || + ParamType->getPointeeType().hasAddressSpace() || + !ArgType->isPointerType() || + !ArgType->getPointeeType().hasAddressSpace() || + isPtrSizeAddressSpace(ArgType->getPointeeType().getAddressSpace())) { + OverloadParams.push_back(ParamType); + continue; + } - PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); - OverloadParams.push_back(Context.getPointerType(PointeeType)); + QualType PointeeType = ParamType->getPointeeType(); + NeedsNewDecl = true; + LangAS AS = ArgType->getPointeeType().getAddressSpace(); + + PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); + OverloadParams.push_back(Context.getPointerType(PointeeType)); + } } if (!NeedsNewDecl) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index fb8f131..0af3847 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -729,19 +729,15 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { // If we have specified a root signature to override the entry function then // attach it now - if (RootSigOverrideIdent) { - LookupResult R(SemaRef, RootSigOverrideIdent, SourceLocation(), - Sema::LookupOrdinaryName); - if (SemaRef.LookupQualifiedName(R, FD->getDeclContext())) - if (auto *SignatureDecl = - dyn_cast<HLSLRootSignatureDecl>(R.getFoundDecl())) { - FD->dropAttr<RootSignatureAttr>(); - // We could look up the SourceRange of the macro here as well - AttributeCommonInfo AL(RootSigOverrideIdent, AttributeScopeInfo(), - SourceRange(), ParsedAttr::Form::Microsoft()); - FD->addAttr(::new (getASTContext()) RootSignatureAttr( - getASTContext(), AL, RootSigOverrideIdent, SignatureDecl)); - } + HLSLRootSignatureDecl *SignatureDecl = + lookupRootSignatureOverrideDecl(FD->getDeclContext()); + if (SignatureDecl) { + FD->dropAttr<RootSignatureAttr>(); + // We could look up the SourceRange of the macro here as well + AttributeCommonInfo AL(RootSigOverrideIdent, AttributeScopeInfo(), + SourceRange(), ParsedAttr::Form::Microsoft()); + FD->addAttr(::new (getASTContext()) RootSignatureAttr( + getASTContext(), AL, RootSigOverrideIdent, SignatureDecl)); } llvm::Triple::EnvironmentType Env = TargetInfo.getTriple().getEnvironment(); @@ -765,12 +761,34 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { case llvm::Triple::UnknownEnvironment: case llvm::Triple::Library: break; + case llvm::Triple::RootSignature: + llvm_unreachable("rootsig environment has no functions"); default: llvm_unreachable("Unhandled environment in triple"); } } } +bool SemaHLSL::isSemanticValid(FunctionDecl *FD, DeclaratorDecl *D) { + const auto *AnnotationAttr = D->getAttr<HLSLAnnotationAttr>(); + if (AnnotationAttr) { + CheckSemanticAnnotation(FD, D, AnnotationAttr); + return true; + } + + const Type *T = D->getType()->getUnqualifiedDesugaredType(); + const RecordType *RT = dyn_cast<RecordType>(T); + if (!RT) + return false; + + const RecordDecl *RD = RT->getOriginalDecl(); + for (FieldDecl *Field : RD->fields()) { + if (!isSemanticValid(FD, Field)) + return false; + } + return true; +} + void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { const auto *ShaderAttr = FD->getAttr<HLSLShaderAttr>(); assert(ShaderAttr && "Entry point has no shader attribute"); @@ -827,16 +845,14 @@ void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { } } break; + case llvm::Triple::RootSignature: + llvm_unreachable("rootsig environment has no function entry point"); default: llvm_unreachable("Unhandled environment in triple"); } for (ParmVarDecl *Param : FD->parameters()) { - if (const auto *AnnotationAttr = Param->getAttr<HLSLAnnotationAttr>()) { - CheckSemanticAnnotation(FD, Param, AnnotationAttr); - } else { - // FIXME: Handle struct parameters where annotations are on struct fields. - // See: https://github.com/llvm/llvm-project/issues/57875 + if (!isSemanticValid(FD, Param)) { Diag(FD->getLocation(), diag::err_hlsl_missing_semantic_annotation); Diag(Param->getLocation(), diag::note_previous_decl) << Param; FD->setInvalidDecl(); @@ -1107,6 +1123,18 @@ void SemaHLSL::ActOnFinishRootSignatureDecl( SemaRef.PushOnScopeChains(SignatureDecl, SemaRef.getCurScope()); } +HLSLRootSignatureDecl * +SemaHLSL::lookupRootSignatureOverrideDecl(DeclContext *DC) const { + if (RootSigOverrideIdent) { + LookupResult R(SemaRef, RootSigOverrideIdent, SourceLocation(), + Sema::LookupOrdinaryName); + if (SemaRef.LookupQualifiedName(R, DC)) + return dyn_cast<HLSLRootSignatureDecl>(R.getFoundDecl()); + } + + return nullptr; +} + namespace { struct PerVisibilityBindingChecker { @@ -1283,9 +1311,8 @@ bool SemaHLSL::handleRootSignatureElements( ReportError(Loc, 1, 0xfffffffe); } - if (!llvm::hlsl::rootsig::verifyDescriptorRangeFlag( - Version, llvm::to_underlying(Clause->Type), - llvm::to_underlying(Clause->Flags))) + if (!llvm::hlsl::rootsig::verifyDescriptorRangeFlag(Version, Clause->Type, + Clause->Flags)) ReportFlagError(Loc); } } @@ -1331,12 +1358,48 @@ bool SemaHLSL::handleRootSignatureElements( std::get_if<llvm::hlsl::rootsig::DescriptorTable>(&Elem)) { assert(UnboundClauses.size() == Table->NumClauses && "Number of unbound elements must match the number of clauses"); + bool HasAnySampler = false; + bool HasAnyNonSampler = false; + uint32_t Offset = 0; for (const auto &[Clause, ClauseElem] : UnboundClauses) { - uint32_t LowerBound(Clause->Reg.Number); + SourceLocation Loc = ClauseElem->getLocation(); + if (Clause->Type == llvm::dxil::ResourceClass::Sampler) + HasAnySampler = true; + else + HasAnyNonSampler = true; + + if (HasAnySampler && HasAnyNonSampler) + Diag(Loc, diag::err_hlsl_invalid_mixed_resources); + // Relevant error will have already been reported above and needs to be - // fixed before we can conduct range analysis, so shortcut error return + // fixed before we can conduct further analysis, so shortcut error + // return if (Clause->NumDescriptors == 0) return true; + + if (Clause->Offset != + llvm::hlsl::rootsig::DescriptorTableOffsetAppend) { + // Manually specified the offset + Offset = Clause->Offset; + } + + uint64_t RangeBound = llvm::hlsl::rootsig::computeRangeBound( + Offset, Clause->NumDescriptors); + + if (!llvm::hlsl::rootsig::verifyBoundOffset(Offset)) { + // Trying to append onto unbound offset + Diag(Loc, diag::err_hlsl_appending_onto_unbound); + } else if (!llvm::hlsl::rootsig::verifyNoOverflowedOffset(RangeBound)) { + // Upper bound overflows maximum offset + Diag(Loc, diag::err_hlsl_offset_overflow) << Offset << RangeBound; + } + + Offset = RangeBound == llvm::hlsl::rootsig::NumDescriptorsUnbounded + ? uint32_t(RangeBound) + : uint32_t(RangeBound + 1); + + // Compute the register bounds and track resource binding + uint32_t LowerBound(Clause->Reg.Number); uint32_t UpperBound = Clause->NumDescriptors == ~0u ? ~0u : LowerBound + Clause->NumDescriptors - 1; @@ -1548,18 +1611,8 @@ bool SemaHLSL::diagnoseInputIDType(QualType T, const ParsedAttr &AL) { return true; } -void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) { - auto *VD = cast<ValueDecl>(D); - if (!diagnoseInputIDType(VD->getType(), AL)) - return; - - D->addAttr(::new (getASTContext()) - HLSLSV_DispatchThreadIDAttr(getASTContext(), AL)); -} - bool SemaHLSL::diagnosePositionType(QualType T, const ParsedAttr &AL) { const auto *VT = T->getAs<VectorType>(); - if (!T->hasFloatingRepresentation() || (VT && VT->getNumElements() > 4)) { Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type) << AL << "float/float1/float2/float3/float4"; @@ -1569,29 +1622,70 @@ bool SemaHLSL::diagnosePositionType(QualType T, const ParsedAttr &AL) { return true; } -void SemaHLSL::handleSV_PositionAttr(Decl *D, const ParsedAttr &AL) { - auto *VD = cast<ValueDecl>(D); - if (!diagnosePositionType(VD->getType(), AL)) - return; +void SemaHLSL::diagnoseSystemSemanticAttr(Decl *D, const ParsedAttr &AL, + std::optional<unsigned> Index) { + std::string SemanticName = AL.getAttrName()->getName().upper(); - D->addAttr(::new (getASTContext()) HLSLSV_PositionAttr(getASTContext(), AL)); -} - -void SemaHLSL::handleSV_GroupThreadIDAttr(Decl *D, const ParsedAttr &AL) { auto *VD = cast<ValueDecl>(D); - if (!diagnoseInputIDType(VD->getType(), AL)) - return; - - D->addAttr(::new (getASTContext()) - HLSLSV_GroupThreadIDAttr(getASTContext(), AL)); -} + QualType ValueType = VD->getType(); + if (auto *FD = dyn_cast<FunctionDecl>(D)) + ValueType = FD->getReturnType(); + + bool IsOutput = false; + if (HLSLParamModifierAttr *MA = D->getAttr<HLSLParamModifierAttr>()) { + if (MA->isOut()) { + IsOutput = true; + ValueType = cast<ReferenceType>(ValueType)->getPointeeType(); + } + } -void SemaHLSL::handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL) { - auto *VD = cast<ValueDecl>(D); - if (!diagnoseInputIDType(VD->getType(), AL)) + Attr *Attribute = nullptr; + if (SemanticName == "SV_DISPATCHTHREADID") { + diagnoseInputIDType(ValueType, AL); + if (IsOutput) + Diag(AL.getLoc(), diag::err_hlsl_semantic_output_not_supported) << AL; + Attribute = createSemanticAttr<HLSLSV_DispatchThreadIDAttr>(AL, Index); + } else if (SemanticName == "SV_GROUPINDEX") { + if (IsOutput) + Diag(AL.getLoc(), diag::err_hlsl_semantic_output_not_supported) << AL; + Attribute = createSemanticAttr<HLSLSV_GroupIndexAttr>(AL, Index); + } else if (SemanticName == "SV_GROUPTHREADID") { + diagnoseInputIDType(ValueType, AL); + if (IsOutput) + Diag(AL.getLoc(), diag::err_hlsl_semantic_output_not_supported) << AL; + Attribute = createSemanticAttr<HLSLSV_GroupThreadIDAttr>(AL, Index); + } else if (SemanticName == "SV_GROUPID") { + diagnoseInputIDType(ValueType, AL); + if (IsOutput) + Diag(AL.getLoc(), diag::err_hlsl_semantic_output_not_supported) << AL; + Attribute = createSemanticAttr<HLSLSV_GroupIDAttr>(AL, Index); + } else if (SemanticName == "SV_POSITION") { + const auto *VT = ValueType->getAs<VectorType>(); + if (!ValueType->hasFloatingRepresentation() || + (VT && VT->getNumElements() > 4)) + Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type) + << AL << "float/float1/float2/float3/float4"; + Attribute = createSemanticAttr<HLSLSV_PositionAttr>(AL, Index); + } else + Diag(AL.getLoc(), diag::err_hlsl_unknown_semantic) << AL; + + if (!Attribute) return; + D->addAttr(Attribute); +} - D->addAttr(::new (getASTContext()) HLSLSV_GroupIDAttr(getASTContext(), AL)); +void SemaHLSL::handleSemanticAttr(Decl *D, const ParsedAttr &AL) { + uint32_t IndexValue, ExplicitIndex; + SemaRef.checkUInt32Argument(AL, AL.getArgAsExpr(0), IndexValue); + SemaRef.checkUInt32Argument(AL, AL.getArgAsExpr(1), ExplicitIndex); + assert(IndexValue > 0 ? ExplicitIndex : true); + std::optional<unsigned> Index = + ExplicitIndex ? std::optional<unsigned>(IndexValue) : std::nullopt; + + if (AL.getAttrName()->getName().starts_with_insensitive("SV_")) + diagnoseSystemSemanticAttr(D, AL, Index); + else + Diag(AL.getLoc(), diag::err_hlsl_unknown_semantic) << AL; } void SemaHLSL::handlePackOffsetAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 8e219e5..a3a25e4 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1710,9 +1710,13 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { const HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts(); + SmallString<256> HSOpts_ModuleCachePath; + normalizeModuleCachePath(PP.getFileManager(), HSOpts.ModuleCachePath, + HSOpts_ModuleCachePath); + AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); - AddString(HSOpts.ModuleCachePath, Record); + AddString(HSOpts_ModuleCachePath, Record); AddString(HSOpts.ModuleUserBuildPath, Record); Record.push_back(HSOpts.DisableModuleHash); Record.push_back(HSOpts.ImplicitModuleMaps); diff --git a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp index fe36e3b..79fd0bd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp @@ -18,11 +18,36 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/Support/FormatVariadic.h" using namespace clang; using namespace ento; +using llvm::formatv; -REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *) +namespace { +enum class VAListState { + Uninitialized, + Unknown, + Initialized, + Released, +}; + +constexpr llvm::StringLiteral StateNames[] = { + "uninitialized", "unknown", "initialized", "already released"}; +} // end anonymous namespace + +static StringRef describeState(const VAListState S) { + return StateNames[static_cast<int>(S)]; +} + +REGISTER_MAP_WITH_PROGRAMSTATE(VAListStateMap, const MemRegion *, VAListState) + +static VAListState getVAListState(ProgramStateRef State, const MemRegion *Reg) { + if (const VAListState *Res = State->get<VAListStateMap>(Reg)) + return *Res; + return Reg->getSymbolicBase() ? VAListState::Unknown + : VAListState::Uninitialized; +} namespace { typedef SmallVector<const MemRegion *, 2> RegionVector; @@ -48,7 +73,7 @@ public: private: const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr, - bool &IsSymbolic, CheckerContext &C) const; + CheckerContext &C) const; const ExplodedNode *getStartCallSite(const ExplodedNode *N, const MemRegion *Reg) const; @@ -57,8 +82,8 @@ private: void reportLeaked(const RegionVector &Leaked, StringRef Msg1, StringRef Msg2, CheckerContext &C, ExplodedNode *N) const; - void checkVAListStartCall(const CallEvent &Call, CheckerContext &C, - bool IsCopy) const; + void checkVAListStartCall(const CallEvent &Call, CheckerContext &C) const; + void checkVAListCopyCall(const CallEvent &Call, CheckerContext &C) const; void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const; class VAListBugVisitor : public BugReporterVisitor { @@ -118,41 +143,35 @@ const CallDescription VAListChecker::VaStart(CDM::CLibrary, void VAListChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { if (VaStart.matches(Call)) - checkVAListStartCall(Call, C, false); + checkVAListStartCall(Call, C); else if (VaCopy.matches(Call)) - checkVAListStartCall(Call, C, true); + checkVAListCopyCall(Call, C); else if (VaEnd.matches(Call)) checkVAListEndCall(Call, C); else { for (auto FuncInfo : VAListAccepters) { if (!FuncInfo.Func.matches(Call)) continue; - bool Symbolic; const MemRegion *VAList = getVAListAsRegion(Call.getArgSVal(FuncInfo.ParamIndex), - Call.getArgExpr(FuncInfo.ParamIndex), Symbolic, C); + Call.getArgExpr(FuncInfo.ParamIndex), C); if (!VAList) return; + VAListState S = getVAListState(C.getState(), VAList); - if (C.getState()->contains<InitializedVALists>(VAList)) - return; - - // We did not see va_start call, but the source of the region is unknown. - // Be conservative and assume the best. - if (Symbolic) + if (S == VAListState::Initialized || S == VAListState::Unknown) return; - SmallString<80> Errmsg("Function '"); - Errmsg += FuncInfo.Func.getFunctionName(); - Errmsg += "' is called with an uninitialized va_list argument"; - reportUninitializedAccess(VAList, Errmsg.c_str(), C); + std::string ErrMsg = + formatv("Function '{0}' is called with an {1} va_list argument", + FuncInfo.Func.getFunctionName(), describeState(S)); + reportUninitializedAccess(VAList, ErrMsg, C); break; } } } const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E, - bool &IsSymbolic, CheckerContext &C) const { const MemRegion *Reg = SV.getAsRegion(); if (!Reg) @@ -168,7 +187,6 @@ const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E, if (isa<ParmVarDecl>(DeclReg->getDecl())) Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion(); } - IsSymbolic = Reg && Reg->getBaseRegion()->getAs<SymbolicRegion>(); // Some VarRegion based VA lists reach here as ElementRegions. const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg); return (EReg && VAListModelledAsArray) ? EReg->getSuperRegion() : Reg; @@ -178,52 +196,53 @@ void VAListChecker::checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const { ProgramStateRef State = C.getState(); const Expr *ArgExpr = VAA->getSubExpr(); - SVal VAListSVal = C.getSVal(ArgExpr); - bool Symbolic; - const MemRegion *VAList = getVAListAsRegion(VAListSVal, ArgExpr, Symbolic, C); + const MemRegion *VAList = getVAListAsRegion(C.getSVal(ArgExpr), ArgExpr, C); if (!VAList) return; - if (Symbolic) + VAListState S = getVAListState(C.getState(), VAList); + if (S == VAListState::Initialized || S == VAListState::Unknown) return; - if (!State->contains<InitializedVALists>(VAList)) - reportUninitializedAccess( - VAList, "va_arg() is called on an uninitialized va_list", C); + + std::string ErrMsg = + formatv("va_arg() is called on an {0} va_list", describeState(S)); + reportUninitializedAccess(VAList, ErrMsg, C); } void VAListChecker::checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const { ProgramStateRef State = C.getState(); - InitializedVAListsTy Tracked = State->get<InitializedVALists>(); + VAListStateMapTy Tracked = State->get<VAListStateMap>(); RegionVector Leaked; - for (const MemRegion *Reg : Tracked) { + for (const auto &[Reg, S] : Tracked) { if (SR.isLiveRegion(Reg)) continue; - Leaked.push_back(Reg); - State = State->remove<InitializedVALists>(Reg); + if (S == VAListState::Initialized) + Leaked.push_back(Reg); + State = State->remove<VAListStateMap>(Reg); } - if (ExplodedNode *N = C.addTransition(State)) + if (ExplodedNode *N = C.addTransition(State)) { reportLeaked(Leaked, "Initialized va_list", " is leaked", C, N); + } } // This function traverses the exploded graph backwards and finds the node where -// the va_list is initialized. That node is used for uniquing the bug paths. -// It is not likely that there are several different va_lists that belongs to -// different stack frames, so that case is not yet handled. +// the va_list becomes initialized. That node is used for uniquing the bug +// paths. It is not likely that there are several different va_lists that +// belongs to different stack frames, so that case is not yet handled. const ExplodedNode * VAListChecker::getStartCallSite(const ExplodedNode *N, const MemRegion *Reg) const { const LocationContext *LeakContext = N->getLocationContext(); const ExplodedNode *StartCallNode = N; - bool FoundInitializedState = false; + bool SeenInitializedState = false; while (N) { - ProgramStateRef State = N->getState(); - if (!State->contains<InitializedVALists>(Reg)) { - if (FoundInitializedState) - break; - } else { - FoundInitializedState = true; + VAListState S = getVAListState(N->getState(), Reg); + if (S == VAListState::Initialized) { + SeenInitializedState = true; + } else if (SeenInitializedState) { + break; } const LocationContext *NContext = N->getLocationContext(); if (NContext == LeakContext || NContext->isParentOf(LeakContext)) @@ -274,71 +293,84 @@ void VAListChecker::reportLeaked(const RegionVector &Leaked, StringRef Msg1, } void VAListChecker::checkVAListStartCall(const CallEvent &Call, - CheckerContext &C, bool IsCopy) const { - bool Symbolic; - const MemRegion *VAList = - getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C); - if (!VAList) + CheckerContext &C) const { + const MemRegion *Arg = + getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C); + if (!Arg) return; ProgramStateRef State = C.getState(); + VAListState ArgState = getVAListState(State, Arg); - if (IsCopy) { - const MemRegion *Arg2 = - getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C); - if (Arg2) { - if (VAList == Arg2) { - RegionVector Leaked{VAList}; - if (ExplodedNode *N = C.addTransition(State)) - reportLeaked(Leaked, "va_list", " is copied onto itself", C, N); - return; - } - if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) { - if (State->contains<InitializedVALists>(VAList)) { - State = State->remove<InitializedVALists>(VAList); - RegionVector Leaked{VAList}; - if (ExplodedNode *N = C.addTransition(State)) - reportLeaked(Leaked, "Initialized va_list", - " is overwritten by an uninitialized one", C, N); - } else { - reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", C); - } - return; - } - } - } - if (State->contains<InitializedVALists>(VAList)) { - RegionVector Leaked{VAList}; + if (ArgState == VAListState::Initialized) { + RegionVector Leaked{Arg}; if (ExplodedNode *N = C.addTransition(State)) reportLeaked(Leaked, "Initialized va_list", " is initialized again", C, N); return; } - State = State->add<InitializedVALists>(VAList); + State = State->set<VAListStateMap>(Arg, VAListState::Initialized); + C.addTransition(State); +} + +void VAListChecker::checkVAListCopyCall(const CallEvent &Call, + CheckerContext &C) const { + const MemRegion *Arg1 = + getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C); + const MemRegion *Arg2 = + getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), C); + if (!Arg1 || !Arg2) + return; + + ProgramStateRef State = C.getState(); + if (Arg1 == Arg2) { + RegionVector Leaked{Arg1}; + if (ExplodedNode *N = C.addTransition(State)) + reportLeaked(Leaked, "va_list", " is copied onto itself", C, N); + return; + } + VAListState State1 = getVAListState(State, Arg1); + VAListState State2 = getVAListState(State, Arg2); + // Update the ProgramState by copying the state of Arg2 to Arg1. + State = State->set<VAListStateMap>(Arg1, State2); + if (State1 == VAListState::Initialized) { + RegionVector Leaked{Arg1}; + std::string Msg2 = + formatv(" is overwritten by {0} {1} one", + (State2 == VAListState::Initialized) ? "another" : "an", + describeState(State2)); + if (ExplodedNode *N = C.addTransition(State)) + reportLeaked(Leaked, "Initialized va_list", Msg2, C, N); + return; + } + if (State2 != VAListState::Initialized && State2 != VAListState::Unknown) { + std::string Msg = formatv("{0} va_list is copied", describeState(State2)); + Msg[0] = toupper(Msg[0]); + reportUninitializedAccess(Arg2, Msg, C); + return; + } C.addTransition(State); } void VAListChecker::checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const { - bool Symbolic; - const MemRegion *VAList = - getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C); - if (!VAList) + const MemRegion *Arg = + getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C); + if (!Arg) return; - // We did not see va_start call, but the source of the region is unknown. - // Be conservative and assume the best. - if (Symbolic) - return; + ProgramStateRef State = C.getState(); + VAListState ArgState = getVAListState(State, Arg); - if (!C.getState()->contains<InitializedVALists>(VAList)) { - reportUninitializedAccess( - VAList, "va_end() is called on an uninitialized va_list", C); + if (ArgState != VAListState::Unknown && + ArgState != VAListState::Initialized) { + std::string Msg = formatv("va_end() is called on an {0} va_list", + describeState(ArgState)); + reportUninitializedAccess(Arg, Msg, C); return; } - ProgramStateRef State = C.getState(); - State = State->remove<InitializedVALists>(VAList); + State = State->set<VAListStateMap>(Arg, VAListState::Released); C.addTransition(State); } @@ -351,13 +383,26 @@ PathDiagnosticPieceRef VAListChecker::VAListBugVisitor::VisitNode( if (!S) return nullptr; + VAListState After = getVAListState(State, Reg); + VAListState Before = getVAListState(StatePrev, Reg); + if (Before == After) + return nullptr; + StringRef Msg; - if (State->contains<InitializedVALists>(Reg) && - !StatePrev->contains<InitializedVALists>(Reg)) + switch (After) { + case VAListState::Uninitialized: + Msg = "Copied uninitialized contents into the va_list"; + break; + case VAListState::Unknown: + Msg = "Copied unknown contents into the va_list"; + break; + case VAListState::Initialized: Msg = "Initialized va_list"; - else if (!State->contains<InitializedVALists>(Reg) && - StatePrev->contains<InitializedVALists>(Reg)) + break; + case VAListState::Released: Msg = "Ended va_list"; + break; + } if (Msg.empty()) return nullptr; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 0d2294e..6f13d55 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -91,6 +91,13 @@ bool tryToFindPtrOrigin( continue; } if (auto *call = dyn_cast<CallExpr>(E)) { + if (auto *Callee = call->getCalleeDecl()) { + if (Callee->hasAttr<CFReturnsRetainedAttr>() || + Callee->hasAttr<NSReturnsRetainedAttr>()) { + return callback(E, true); + } + } + if (auto *memberCall = dyn_cast<CXXMemberCallExpr>(call)) { if (auto *decl = memberCall->getMethodDecl()) { std::optional<bool> IsGetterOfRefCt = isGetterOfSafePtr(decl); @@ -154,6 +161,24 @@ bool tryToFindPtrOrigin( Name == "NSClassFromString") return callback(E, true); } + + // Sometimes, canonical type erroneously turns Ref<T> into T. + // Workaround this problem by checking again if the original type was + // a SubstTemplateTypeParmType of a safe smart pointer type (e.g. Ref). + if (auto *CalleeDecl = call->getCalleeDecl()) { + if (auto *FD = dyn_cast<FunctionDecl>(CalleeDecl)) { + auto RetType = FD->getReturnType(); + if (auto *Subst = dyn_cast<SubstTemplateTypeParmType>(RetType)) { + if (auto *SubstType = Subst->desugar().getTypePtr()) { + if (auto *RD = dyn_cast<RecordType>(SubstType)) { + if (auto *CXX = dyn_cast<CXXRecordDecl>(RD->getOriginalDecl())) + if (isSafePtr(CXX)) + return callback(E, true); + } + } + } + } + } } if (auto *ObjCMsgExpr = dyn_cast<ObjCMessageExpr>(E)) { if (auto *Method = ObjCMsgExpr->getMethodDecl()) { @@ -217,6 +242,16 @@ bool isASafeCallArg(const Expr *E) { return isa<CXXThisExpr>(E); } +bool isNullPtr(const clang::Expr *E) { + if (isa<CXXNullPtrLiteralExpr>(E) || isa<GNUNullExpr>(E)) + return true; + if (auto *Int = dyn_cast_or_null<IntegerLiteral>(E)) { + if (Int->getValue().isZero()) + return true; + } + return false; +} + bool isConstOwnerPtrMemberExpr(const clang::Expr *E) { if (auto *MCE = dyn_cast<CXXMemberCallExpr>(E)) { if (auto *Callee = MCE->getDirectCallee()) { @@ -275,7 +310,7 @@ public: bool VisitReturnStmt(const ReturnStmt *RS) { if (auto *RV = RS->getRetValue()) { RV = RV->IgnoreParenCasts(); - if (isa<CXXNullPtrLiteralExpr>(RV)) + if (isNullPtr(RV)) return true; return isConstOwnerPtrMemberExpr(RV); } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h index 8302bbe..3a009d6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h @@ -66,6 +66,9 @@ bool tryToFindPtrOrigin( /// \returns Whether \p E is a safe call arugment. bool isASafeCallArg(const clang::Expr *E); +/// \returns true if E is nullptr or __null. +bool isNullPtr(const clang::Expr *E); + /// \returns true if E is a MemberExpr accessing a const smart pointer type. bool isConstOwnerPtrMemberExpr(const clang::Expr *E); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp index ec0c2c1..9deb184 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp @@ -272,7 +272,7 @@ public: ArgExpr = ArgExpr->IgnoreParenCasts(); } } - if (isa<CXXNullPtrLiteralExpr>(ArgExpr) || isa<IntegerLiteral>(ArgExpr) || + if (isNullPtr(ArgExpr) || isa<IntegerLiteral>(ArgExpr) || isa<CXXDefaultArgExpr>(ArgExpr)) return; if (auto *DRE = dyn_cast<DeclRefExpr>(ArgExpr)) { diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 884dbe9..56747d7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -666,6 +666,10 @@ public: return IsFunctionTrivial(Callee); } + bool VisitGCCAsmStmt(const GCCAsmStmt *AS) { + return AS->getAsmString() == "brk #0xc471"; + } + bool VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTemplateParmExpr *E) { // Non-type template paramter is compile time constant and trivial. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp index 764e2c6..e80f174 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp @@ -217,13 +217,11 @@ public: [&](const clang::Expr *ArgOrigin, bool IsSafe) { if (IsSafe) return true; - if (isa<CXXNullPtrLiteralExpr>(ArgOrigin)) { - // foo(nullptr) + if (isNullPtr(ArgOrigin)) return true; - } if (isa<IntegerLiteral>(ArgOrigin)) { // FIXME: Check the value. - // foo(NULL) + // foo(123) return true; } if (isa<ObjCStringLiteral>(ArgOrigin)) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp index 27a9113..03eeb99 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp @@ -232,14 +232,11 @@ public: if (!Init) return nullptr; if (auto *Lambda = dyn_cast<LambdaExpr>(Init)) { + DeclRefExprsToIgnore.insert(DRE); updateIgnoreList(); return Lambda; } - TempExpr = dyn_cast<CXXBindTemporaryExpr>(Init->IgnoreParenCasts()); - if (!TempExpr) - return nullptr; - updateIgnoreList(); - return dyn_cast_or_null<LambdaExpr>(TempExpr->getSubExpr()); + return nullptr; } void checkCalleeLambda(CallExpr *CE) { diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp index 7cd86a6..f4f6e28 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp @@ -295,7 +295,7 @@ public: if (isa<CXXThisExpr>(InitArgOrigin)) return true; - if (isa<CXXNullPtrLiteralExpr>(InitArgOrigin)) + if (isNullPtr(InitArgOrigin)) return true; if (isa<IntegerLiteral>(InitArgOrigin)) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RetainPtrCtorAdoptChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RetainPtrCtorAdoptChecker.cpp index d74fec2..5c1b2d7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RetainPtrCtorAdoptChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RetainPtrCtorAdoptChecker.cpp @@ -177,7 +177,8 @@ public: CreateOrCopyFnCall.insert(Arg); // Avoid double reporting. return; } - if (Result == IsOwnedResult::Owned || Result == IsOwnedResult::Skip) { + if (Result == IsOwnedResult::Owned || Result == IsOwnedResult::Skip || + isNullPtr(Arg)) { CreateOrCopyFnCall.insert(Arg); return; } @@ -486,7 +487,7 @@ public: continue; } } - if (isa<CXXNullPtrLiteralExpr>(E)) + if (isNullPtr(E)) return IsOwnedResult::NotOwned; if (auto *DRE = dyn_cast<DeclRefExpr>(E)) { auto QT = DRE->getType(); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index c0b28d2..dee34e3 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -71,21 +71,30 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, Bldr.takeNodes(Pred); assert(ThisRD); - SVal V = Call.getArgSVal(0); - const Expr *VExpr = Call.getArgExpr(0); - // If the value being copied is not unknown, load from its location to get - // an aggregate rvalue. - if (std::optional<Loc> L = V.getAs<Loc>()) - V = Pred->getState()->getSVal(*L); - else - assert(V.isUnknownOrUndef()); + if (!ThisRD->isEmpty()) { + SVal V = Call.getArgSVal(0); + const Expr *VExpr = Call.getArgExpr(0); - ExplodedNodeSet Tmp; - evalLocation(Tmp, CallExpr, VExpr, Pred, Pred->getState(), V, - /*isLoad=*/true); - for (ExplodedNode *N : Tmp) - evalBind(Dst, CallExpr, N, ThisVal, V, !AlwaysReturnsLValue); + // If the value being copied is not unknown, load from its location to get + // an aggregate rvalue. + if (std::optional<Loc> L = V.getAs<Loc>()) + V = Pred->getState()->getSVal(*L); + else + assert(V.isUnknownOrUndef()); + + ExplodedNodeSet Tmp; + evalLocation(Tmp, CallExpr, VExpr, Pred, Pred->getState(), V, + /*isLoad=*/true); + for (ExplodedNode *N : Tmp) + evalBind(Dst, CallExpr, N, ThisVal, V, !AlwaysReturnsLValue); + } else { + // We can't copy empty classes because of empty base class optimization. + // In that case, copying the empty base class subobject would overwrite the + // object that it overlaps with - so let's not do that. + // See issue-157467.cpp for an example. + Dst.Add(Pred); + } PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 8f18533..8e9d6fe 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -43,10 +43,13 @@ using namespace ento; namespace { class BindingKey { public: - enum Kind { Default = 0x0, Direct = 0x1 }; -private: - enum { Symbolic = 0x2 }; + enum Kind { + Default = 0x0, + Direct = 0x1, + Symbolic = 0x2, + }; +private: llvm::PointerIntPair<const MemRegion *, 2> P; uint64_t Data; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index f32747d..0855e6d 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -449,9 +449,10 @@ public: // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - StringRef ModulesCachePath = - ScanInstance.getHeaderSearchOpts().ModuleCachePath; - + SmallString<256> ModulesCachePath; + normalizeModuleCachePath( + *FileMgr, ScanInstance.getHeaderSearchOpts().ModuleCachePath, + ModulesCachePath); DepFS->resetBypassedPathPrefix(); if (!ModulesCachePath.empty()) DepFS->setBypassedPathPrefix(ModulesCachePath); diff --git a/clang/test/APINotes/yaml-roundtrip.test b/clang/test/APINotes/yaml-roundtrip.test index f4fa9ce..355596e 100644 --- a/clang/test/APINotes/yaml-roundtrip.test +++ b/clang/test/APINotes/yaml-roundtrip.test @@ -1,6 +1,6 @@ RUN: apinotes-test %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes > %t.result RUN: not diff -u %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes %t.result | \ -RUN: tail -n +5 | head -n -1 | \ +RUN: tail -n +5 | \ RUN: FileCheck %s --implicit-check-not="{{^\+}}" --implicit-check-not="{{^\-}}" We expect only the nullability to be different as it is canonicalized during the @@ -16,3 +16,8 @@ CHECK: - Nullability: S CHECK-NEXT: + Nullability: Unspecified CHECK: - Nullability: Scalar CHECK-NEXT: + Nullability: Unspecified + +# The roundtrip will add document markers. It is hard to remove the last line of the +# file in a cross-platform manner, so just assert it here to avoid a test failure due +# to the implicit check not. +# CHECK: + diff --git a/clang/test/AST/ByteCode/builtins.c b/clang/test/AST/ByteCode/builtins.c new file mode 100644 index 0000000..a51260c --- /dev/null +++ b/clang/test/AST/ByteCode/builtins.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter %s -verify +// RUN: %clang_cc1 %s -verify=ref + +// expected-no-diagnostics +// ref-no-diagnostics + +extern __SIZE_TYPE__ strlen(const char *); + +struct str_t { + char s1[sizeof("a")]; +}; +static const struct str_t str1 = {"a"}; +#define str ((const char *)&str1) +int structStrlen(void) { + if (strlen(str) == 1) + return 0; + return 1; +} + diff --git a/clang/test/AST/ByteCode/builtins.cpp b/clang/test/AST/ByteCode/builtins.cpp index 9b2b207..33b703c 100644 --- a/clang/test/AST/ByteCode/builtins.cpp +++ b/clang/test/AST/ByteCode/builtins.cpp @@ -36,3 +36,10 @@ void test_builtin_os_log(void *buf, int i, const char *data) { constexpr int len = __builtin_os_log_format_buffer_size("%d %{public}s %{private}.16P", i, data, data); static_assert(len > 0, "Expect len > 0"); } + +void addc_dummy() +{ + unsigned int a; + if (__builtin_addc (1, 42, 0, &a)) + __builtin_abort (); +} diff --git a/clang/test/AST/ByteCode/openmp.cpp b/clang/test/AST/ByteCode/openmp.cpp index 15f10a2..c7cccfd 100644 --- a/clang/test/AST/ByteCode/openmp.cpp +++ b/clang/test/AST/ByteCode/openmp.cpp @@ -17,12 +17,12 @@ extern int omp_get_thread_num(void); int test2() { int x = 0; - int device_result[N] = {0}; + int result[N] = {0}; - #pragma omp target parallel loop num_threads(strict: N) severity(warning) message("msg") + #pragma omp parallel loop num_threads(strict: N) severity(warning) message("msg") for (int i = 0; i < N; i++) { x = omp_get_thread_num(); - device_result[i] = i + x; + result[i] = i + x; } } diff --git a/clang/test/AST/ByteCode/references.cpp b/clang/test/AST/ByteCode/references.cpp index 36609b7..3da3996a 100644 --- a/clang/test/AST/ByteCode/references.cpp +++ b/clang/test/AST/ByteCode/references.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s -// RUN: %clang_cc1 -verify=ref,both %s +// RUN: %clang_cc1 -verify=ref,both %s constexpr int a = 10; @@ -178,3 +178,19 @@ namespace Params { static_assert(foo()); } + +namespace ReadFromNullBlockPtr { + struct S { + int *const &t; + }; + + void foo(int x) { + constexpr S s = {&x}; // both-error {{must be initialized by a constant expression}} \ + // both-note {{reference to temporary}} \ + // both-note {{created here}} \ + // ref-note {{declared here}} + static_assert(s.t == &x, ""); // both-error {{not an integral constant expression}} \ + // expected-note {{read of dereferenced null pointer}} \ + // ref-note {{initializer of 's' is not a constant expression}} + } +} diff --git a/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl b/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl index f2a3a74..8d2e36f 100644 --- a/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl @@ -82,6 +82,62 @@ RESOURCE Buffer; // CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue implicit this // CHECK-NEXT: AlwaysInlineAttr +// Static __createFromBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromBinding 'hlsl::[[RESOURCE]] (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} registerNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefrombinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'registerNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + +// Static __createFromImplicitBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromImplicitBinding 'hlsl::[[RESOURCE]] (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt {{.*}} +// CHECK-NEXT: DeclStmt {{.*}} +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + // Constructor from binding // CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]] 'void (unsigned int, unsigned int, int, unsigned int, const char *)' inline @@ -130,5 +186,5 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' // CHECK-NEXT: AlwaysInlineAttr -// CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'const element_type &(unsigned int) const' -// CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'element_type &(unsigned int)' +// CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'const char8_t &(unsigned int) const' +// CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'char8_t &(unsigned int)' diff --git a/clang/test/AST/HLSL/RootSignature-Target-AST.hlsl b/clang/test/AST/HLSL/RootSignature-Target-AST.hlsl new file mode 100644 index 0000000..91441e3 --- /dev/null +++ b/clang/test/AST/HLSL/RootSignature-Target-AST.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-rootsignature -ast-dump \ +// RUN: -hlsl-entry EntryRootSig -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-V1_1 + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-rootsignature -ast-dump \ +// RUN: -fdx-rootsignature-version=rootsig_1_0 \ +// RUN: -hlsl-entry EntryRootSig -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-V1_0 + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-rootsignature -ast-dump \ +// RUN: -D CmdRS='"UAV(u0)"'\ +// RUN: -hlsl-entry CmdRS -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CMD + +// CHECK: -HLSLRootSignatureDecl 0x{{.*}} {{.*}} implicit [[ENTRY_RS_DECL:__hlsl_rootsig_decl_\d*]] +// CHECK-V1_0-SAME: version: 1.0, +// CHECK-V1_1-SAME: version: 1.1, +// CHECK-SAME: RootElements{ +// CHECK-SAME: RootCBV(b0, +// CHECK-SAME: space = 0, visibility = All, +// CHECK-V1_0-SAME: flags = DataVolatile +// CHECK-V1_1-SAME: flags = DataStaticWhileSetAtExecute +// CHECK-SAME: ) +// CHECK-SAME: } +#define EntryRootSig "CBV(b0)" + +// CMD: -HLSLRootSignatureDecl 0x{{.*}} {{.*}} implicit [[CMD_RS_DECL:__hlsl_rootsig_decl_\d*]] +// CMD-SAME: version: 1.1, +// CMD-SAME: RootElements{ +// CMD-SAME: RootUAV(u0, space = 0, visibility = All, flags = DataVolatile) +// CMD-SAME: } diff --git a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl index 23ed410..52a2c20 100644 --- a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl @@ -129,6 +129,62 @@ RESOURCE<float> Buffer; // CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this // CHECK-NEXT: AlwaysInlineAttr +// Static __createFromBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromBinding 'hlsl::[[RESOURCE]]<element_type> (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} registerNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefrombinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'registerNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + +// Static __createFromImplicitBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromImplicitBinding 'hlsl::[[RESOURCE]]<element_type> (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt {{.*}} +// CHECK-NEXT: DeclStmt {{.*}} +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + // Constructor from binding // CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]]<element_type> 'void (unsigned int, unsigned int, int, unsigned int, const char *)' inline diff --git a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl index 4e3cdea..95f5789 100644 --- a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl @@ -104,6 +104,62 @@ RESOURCE<float> Buffer; // CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this // CHECK-NEXT: AlwaysInlineAttr +// Static __createFromBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromBinding 'hlsl::[[RESOURCE]]<element_type> (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} registerNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefrombinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'registerNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + +// Static __createFromImplicitBinding method + +// CHECK: CXXMethodDecl {{.*}} __createFromImplicitBinding 'hlsl::[[RESOURCE]]<element_type> (unsigned int, unsigned int, int, unsigned int, const char *)' static +// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} name 'const char *' +// CHECK-NEXT: CompoundStmt {{.*}} +// CHECK-NEXT: DeclStmt {{.*}} +// CHECK-NEXT: VarDecl {{.*}} tmp 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: BinaryOperator {{.*}} '__hlsl_resource_t {{.*}}]]' '=' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t {{.*}}' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(...) noexcept' <BuiltinFnToFnPtr> +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t {{.*}}' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue Var {{.*}} 'tmp' 'hlsl::[[RESOURCE]]<element_type>' +// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline + // Constructor from binding // CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]]<element_type> 'void (unsigned int, unsigned int, int, unsigned int, const char *)' inline diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-checked-const-member.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-checked-const-member.cpp index f709560..7959daf 100644 --- a/clang/test/Analysis/Checkers/WebKit/call-args-checked-const-member.cpp +++ b/clang/test/Analysis/Checkers/WebKit/call-args-checked-const-member.cpp @@ -71,12 +71,21 @@ public: return m_obj5.get(); } + CheckedObj* ensureObj6() { + if (!m_obj6) + const_cast<std::unique_ptr<CheckedObj>&>(m_obj6) = new CheckedObj; + if (m_obj6->next()) + return (CheckedObj *)0; + return m_obj6.get(); + } + private: const std::unique_ptr<CheckedObj> m_obj1; std::unique_ptr<CheckedObj> m_obj2; const std::unique_ptr<CheckedObj> m_obj3; const std::unique_ptr<CheckedObj> m_obj4; const std::unique_ptr<CheckedObj> m_obj5; + const std::unique_ptr<CheckedObj> m_obj6; }; void Foo::bar() { @@ -87,6 +96,7 @@ void Foo::bar() { badEnsureObj4().method(); // expected-warning@-1{{Call argument for 'this' parameter is unchecked and unsafe}} ensureObj5()->method(); + ensureObj6()->method(); } } // namespace call_args_const_unique_ptr diff --git a/clang/test/Analysis/Checkers/WebKit/retain-ptr-ctor-adopt-use.mm b/clang/test/Analysis/Checkers/WebKit/retain-ptr-ctor-adopt-use.mm index 83c87b1..7699017 100644 --- a/clang/test/Analysis/Checkers/WebKit/retain-ptr-ctor-adopt-use.mm +++ b/clang/test/Analysis/Checkers/WebKit/retain-ptr-ctor-adopt-use.mm @@ -13,6 +13,8 @@ void basic_correct() { auto ns4 = adoptNS([ns3 mutableCopy]); auto ns5 = adoptNS([ns3 copyWithValue:3]); auto ns6 = retainPtr([ns3 next]); + auto ns7 = retainPtr((SomeObj *)0); + auto ns8 = adoptNS(nil); CFMutableArrayRef cf1 = adoptCF(CFArrayCreateMutable(kCFAllocatorDefault, 10)); auto cf2 = adoptCF(SecTaskCreateFromSelf(kCFAllocatorDefault)); auto cf3 = adoptCF(checked_cf_cast<CFArrayRef>(CFCopyArray(cf1))); @@ -111,6 +113,10 @@ RetainPtr<CVPixelBufferRef> cf_out_argument() { return adoptCF(rawBuffer); } +RetainPtr<SomeObj> return_nil() { + return nil; +} + RetainPtr<SomeObj> return_nullptr() { return nullptr; } diff --git a/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp b/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp new file mode 100644 index 0000000..b0ff210 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +#include "mock-types.h" + +struct Obj { + void ref() const; + void deref() const; + + void someFunction(); +}; + +template<typename T> class Wrapper { +public: + T obj(); +}; + +static void foo(Wrapper<Ref<Obj>>&& wrapper) +{ + wrapper.obj()->someFunction(); +} diff --git a/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp b/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp new file mode 100644 index 0000000..de98c77 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_analyze_cc1 -triple arm-darwin -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +void crash() +{ + __asm__ volatile ("brk #0xc471"); + __builtin_unreachable(); +} + +class SomeObj { +public: + void ref(); + void deref(); + + void someWork() { crash(); } +}; + +SomeObj* provide(); + +void doSomeWork() { + provide()->someWork(); +} diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp index 3079f8e..0b8af0d 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp @@ -448,4 +448,27 @@ void ranges_for_each(RefCountable* obj) { obj->method(); ++(*static_cast<unsigned*>(item)); }); -}
\ No newline at end of file +} + +class RefCountedObj { +public: + void ref(); + void deref(); + + void call() const; + void callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const; + void doSomeWork() const; +}; + +void RefCountedObj::callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const +{ + callback(); +} + +void RefCountedObj::call() const +{ + auto lambda = [&] { + doSomeWork(); + }; + callLambda(lambda); +} diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 0540ed9..3364637 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -121,6 +121,7 @@ void foo8(RefCountable* obj) { RefCountable *bar = foo->trivial() ? foo.get() : nullptr; // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} foo = nullptr; + foo = (RefCountable *)0; bar->method(); } } diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm index c69113c..f39822e 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm @@ -438,6 +438,32 @@ void use_const_local() { } // namespace const_global +namespace ns_retained_return_value { + +NSString *provideNS() NS_RETURNS_RETAINED; +CFDictionaryRef provideCF() CF_RETURNS_RETAINED; +void consumeNS(NSString *); +void consumeCF(CFDictionaryRef); + +void foo() { + consumeNS(provideNS()); + consumeCF(provideCF()); +} + +struct Base { + NSString *provideStr() NS_RETURNS_RETAINED; +}; + +struct Derived : Base { + void consumeStr(NSString *); + + void foo() { + consumeStr(provideStr()); + } +}; + +} // namespace ns_retained_return_value + @interface TestObject : NSObject - (void)doWork:(NSString *)msg, ...; - (void)doWorkOnSelf; @@ -456,6 +482,8 @@ void use_const_local() { // expected-warning@-1{{Call argument is unretained and unsafe}} // expected-warning@-2{{Call argument is unretained and unsafe}} [self doWork:@"hello", RetainPtr<SomeObj> { provide() }.get(), RetainPtr<CFMutableArrayRef> { provide_cf() }.get()]; + [self doWork:__null]; + [self doWork:nil]; } - (SomeObj *)getSomeObj { diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm index 10f7c9a..0ad8f70 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm @@ -408,6 +408,21 @@ void use_const_local() { } // namespace const_global +namespace ns_retained_return_value { + +NSString *provideNS() NS_RETURNS_RETAINED; +CFDictionaryRef provideCF() CF_RETURNS_RETAINED; +void consumeNS(NSString *); +void consumeCF(CFDictionaryRef); + +unsigned foo() { + auto *string = provideNS(); + auto *dictionary = provideCF(); + return string.length + CFDictionaryGetCount(dictionary); +} + +} // namespace ns_retained_return_value + bool doMoreWorkOpaque(OtherObj*); SomeObj* provide(); diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp index 45c8ca4..940ff9b 100644 --- a/clang/test/Analysis/ctor-trivial-copy.cpp +++ b/clang/test/Analysis/ctor-trivial-copy.cpp @@ -46,15 +46,10 @@ void _01_empty_structs() { empty Empty = conjure<empty>(); empty Empty2 = Empty; empty Empty3 = Empty2; - // All of these should refer to the exact same symbol, because all of - // these trivial copies refer to the original conjured value. - // There were Unknown before: - clang_analyzer_denote(Empty, "$Empty"); - clang_analyzer_express(Empty); // expected-warning {{$Empty}} - clang_analyzer_express(Empty2); // expected-warning {{$Empty}} - clang_analyzer_express(Empty3); // expected-warning {{$Empty}} - // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3". + // We only have binding for the original Empty object, because copying empty + // objects is a no-op in the performTrivialCopy. This is fine, because empty + // objects don't have any data members that could be accessed anyway. clang_analyzer_printState(); // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ @@ -65,12 +60,6 @@ void _01_empty_structs() { // CHECK-NEXT: ]}, // CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" } - // CHECK-NEXT: ]}, - // CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [ - // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } - // CHECK-NEXT: ]}, - // CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [ - // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } // CHECK-NEXT: ]} // CHECK-NEXT: ]}, diff --git a/clang/test/Analysis/issue-157467.cpp b/clang/test/Analysis/issue-157467.cpp new file mode 100644 index 0000000..8281ea1 --- /dev/null +++ b/clang/test/Analysis/issue-157467.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s +// expected-no-diagnostics + +template <class T, int Idx, bool CanBeEmptyBase = __is_empty(T) && (!__is_final(T))> +struct compressed_pair_elem { + explicit compressed_pair_elem(T u) : value(u) {} + T value; +}; + +template <class T, int Idx> +struct compressed_pair_elem<T, Idx, /*CanBeEmptyBase=*/true> : T { + explicit compressed_pair_elem(T u) : T(u) {} +}; + +template <class T1, class T2, class Base1 = compressed_pair_elem<T1, 0>, class Base2 = compressed_pair_elem<T2, 1>> +struct compressed_pair : Base1, Base2 { + explicit compressed_pair(T1 t1, T2 t2) : Base1(t1), Base2(t2) {} +}; + +// empty deleter object +template <class T> +struct default_delete { + void operator()(T* p) { + delete p; + } +}; + +template <class T, class Deleter = default_delete<T> > +struct some_unique_ptr { + // compressed_pair will employ the empty base class optimization, thus overlapping + // the `int*` and the empty `Deleter` object, clobbering the pointer. + compressed_pair<int*, Deleter> ptr; + some_unique_ptr(int* p, Deleter d) : ptr(p, d) {} + ~some_unique_ptr(); +}; + +void entry_point() { + some_unique_ptr<int, default_delete<int> > u3(new int(12), default_delete<int>()); +} diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index c080313..fc7c373 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -153,8 +153,9 @@ void top() { int Int = mySource1<int>(); clang_analyzer_isTainted(Int); // expected-warning {{YES}} + // It's fine to not propagate taint to empty classes, since they don't have any data members. Empty E = mySource1<Empty>(); - clang_analyzer_isTainted(E); // expected-warning {{YES}} + clang_analyzer_isTainted(E); // expected-warning {{NO}} Aggr A = mySource1<Aggr>(); clang_analyzer_isTainted(A); // expected-warning {{YES}} diff --git a/clang/test/Analysis/valist-uninitialized-no-undef.c b/clang/test/Analysis/valist-uninitialized-no-undef.c index b8add29..da007e6 100644 --- a/clang/test/Analysis/valist-uninitialized-no-undef.c +++ b/clang/test/Analysis/valist-uninitialized-no-undef.c @@ -37,7 +37,7 @@ void call_vsprintf_bad(char *buffer, ...) { va_list va; va_start(va, buffer); // expected-note{{Initialized va_list}} va_end(va); // expected-note{{Ended va_list}} - vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an uninitialized va_list argument}} - // expected-note@-1{{Function 'vsprintf' is called with an uninitialized va_list argument}} + vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an already released va_list argument}} + // expected-note@-1{{Function 'vsprintf' is called with an already released va_list argument}} } diff --git a/clang/test/Analysis/valist-uninitialized.c b/clang/test/Analysis/valist-uninitialized.c index 689fc95..f28f928 100644 --- a/clang/test/Analysis/valist-uninitialized.c +++ b/clang/test/Analysis/valist-uninitialized.c @@ -23,8 +23,8 @@ int f2(int fst, ...) { va_list va; va_start(va, fst); // expected-note{{Initialized va_list}} va_end(va); // expected-note{{Ended va_list}} - return va_arg(va, int); // expected-warning{{va_arg() is called on an uninitialized va_list}} - // expected-note@-1{{va_arg() is called on an uninitialized va_list}} + return va_arg(va, int); // expected-warning{{va_arg() is called on an already released va_list}} + // expected-note@-1{{va_arg() is called on an already released va_list}} } void f3(int fst, ...) { @@ -60,8 +60,8 @@ void f8(int *fst, ...) { va_list *y = &x; va_start(*y,fst); // expected-note{{Initialized va_list}} va_end(x); // expected-note{{Ended va_list}} - (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an uninitialized va_list}} - // expected-note@-1{{va_arg() is called on an uninitialized va_list}} + (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an already released va_list}} + // expected-note@-1{{va_arg() is called on an already released va_list}} } void reinitOk(int *fst, ...) { @@ -82,8 +82,8 @@ void reinit3(int *fst, ...) { va_start(va, fst); // expected-note{{Initialized va_list}} (void)va_arg(va, int); va_end(va); // expected-note{{Ended va_list}} - (void)va_arg(va, int); //expected-warning{{va_arg() is called on an uninitialized va_list}} - // expected-note@-1{{va_arg() is called on an uninitialized va_list}} + (void)va_arg(va, int); //expected-warning{{va_arg() is called on an already released va_list}} + // expected-note@-1{{va_arg() is called on an already released va_list}} } void copyUnint(int fst, ...) { @@ -102,8 +102,8 @@ void g2(int fst, ...) { va_list va; va_start(va, fst); // expected-note{{Initialized va_list}} va_end(va); // expected-note{{Ended va_list}} - va_end(va); // expected-warning{{va_end() is called on an uninitialized va_list}} - // expected-note@-1{{va_end() is called on an uninitialized va_list}} + va_end(va); // expected-warning{{va_end() is called on an already released va_list}} + // expected-note@-1{{va_end() is called on an already released va_list}} } void is_sink(int fst, ...) { @@ -151,3 +151,16 @@ void va_copy_test(va_list arg) { va_copy(dst, arg); va_end(dst); } + +void all_state_changes(va_list unknown, int fst, ...) { + va_list va, va2; + va_start(va, fst); // expected-note{{Initialized va_list}} + va_copy(va, unknown); // expected-note{{Copied unknown contents into the va_list}} + va_end(va); // expected-note{{Ended va_list}} + va_start(va, fst); // expected-note{{Initialized va_list}} + va_copy(va, va2); // expected-note{{Copied uninitialized contents into the va_list}} + va_start(va, fst); // expected-note{{Initialized va_list}} + va_end(va); // expected-note{{Ended va_list}} + va_end(va); // expected-warning{{va_end() is called on an already released va_list}} + // expected-note@-1{{va_end() is called on an already released va_list}} +} diff --git a/clang/test/Analysis/valist-unterminated.c b/clang/test/Analysis/valist-unterminated.c index cc89268..93e20740 100644 --- a/clang/test/Analysis/valist-unterminated.c +++ b/clang/test/Analysis/valist-unterminated.c @@ -97,12 +97,28 @@ void copyOverwrite(int fst, ...) { // expected-note@-1{{Initialized va_list 'va' is overwritten by an uninitialized one}} } +void copyOverwriteUnknown(va_list other, int fst, ...) { + va_list va; + va_start(va, fst); // expected-note{{Initialized va_list}} + va_copy(va, other); // expected-warning{{Initialized va_list 'va' is overwritten by an unknown one}} + // expected-note@-1{{Initialized va_list 'va' is overwritten by an unknown one}} +} + +void copyOverwriteReleased(int fst, ...) { + va_list va, va2; + va_start(va2, fst); + va_end(va2); + va_start(va, fst); // expected-note{{Initialized va_list}} + va_copy(va, va2); // expected-warning{{Initialized va_list 'va' is overwritten by an already released one}} + // expected-note@-1{{Initialized va_list 'va' is overwritten by an already released one}} +} + void recopy(int fst, ...) { va_list va, va2; va_start(va, fst); va_copy(va2, va); // expected-note{{Initialized va_list}} - va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is initialized again}} - // expected-note@-1{{Initialized va_list 'va2' is initialized again}} + va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is overwritten by another initialized one}} + // expected-note@-1{{Initialized va_list 'va2' is overwritten by another initialized one}} va_end(va); va_end(va2); } diff --git a/clang/test/CIR/CodeGen/builtin_call.cpp b/clang/test/CIR/CodeGen/builtin_call.cpp index 09be793..853d894 100644 --- a/clang/test/CIR/CodeGen/builtin_call.cpp +++ b/clang/test/CIR/CodeGen/builtin_call.cpp @@ -211,6 +211,10 @@ void unreachable() { // LLVM: unreachable // LLVM: } +// OGCG-LABEL: @_Z11unreachablev +// OGCG: unreachable +// OGCG: } + void f1(); void unreachable2() { __builtin_unreachable(); @@ -229,6 +233,9 @@ void unreachable2() { // LLVM-NEXT: call void @_Z2f1v() // LLVM: } +// OGCG-LABEL: @_Z12unreachable2v +// OGCG: unreachable + void trap() { __builtin_trap(); } @@ -241,6 +248,10 @@ void trap() { // LLVM: call void @llvm.trap() // LLVM: } +// OGCG-LABEL: @_Z4trapv +// OGCG: call void @llvm.trap() +// OGCG: } + void trap2() { __builtin_trap(); f1(); @@ -258,3 +269,40 @@ void trap2() { // LLVM: {{.+}}: // LLVM-NEXT: call void @_Z2f1v() // LLVM: } + +// OGCG-LABEL: define{{.*}} void @_Z5trap2v +// OGCG: call void @llvm.trap() +// OGCG-NEXT: call void @_Z2f1v() +// OGCG: ret void +// OGCG: } + +void *test_alloca(unsigned long n) { + return __builtin_alloca(n); +} + +// CIR-LABEL: @_Z11test_allocam( +// CIR: %{{.+}} = cir.alloca !u8i, !cir.ptr<!u8i>, %{{.+}} : !u64i, ["bi_alloca"] + +// LLVM-LABEL: @_Z11test_allocam( +// LLVM: alloca i8, i64 %{{.+}} + +// OGCG-LABEL: @_Z11test_allocam( +// OGCG: alloca i8, i64 %{{.+}} + +bool test_multiple_allocas(unsigned long n) { + void *a = __builtin_alloca(n); + void *b = __builtin_alloca(n); + return a != b; +} + +// CIR-LABEL: @_Z21test_multiple_allocasm( +// CIR: %{{.+}} = cir.alloca !u8i, !cir.ptr<!u8i>, %{{.+}} : !u64i, ["bi_alloca"] +// CIR: %{{.+}} = cir.alloca !u8i, !cir.ptr<!u8i>, %{{.+}} : !u64i, ["bi_alloca"] + +// LLVM-LABEL: @_Z21test_multiple_allocasm( +// LLVM: alloca i8, i64 %{{.+}} +// LLVM: alloca i8, i64 %{{.+}} + +// OGCG-LABEL: @_Z21test_multiple_allocasm( +// OGCG: alloca i8, i64 %{{.+}} +// OGCG: alloca i8, i64 %{{.+}} diff --git a/clang/test/CIR/CodeGen/builtins-elementwise.c b/clang/test/CIR/CodeGen/builtins-elementwise.c index 1898f56..e3460f0 100644 --- a/clang/test/CIR/CodeGen/builtins-elementwise.c +++ b/clang/test/CIR/CodeGen/builtins-elementwise.c @@ -36,3 +36,56 @@ void test_builtin_elementwise_acos(float f, double d, vfloat4 vf4, vd4 = __builtin_elementwise_acos(vd4); } +void test_builtin_elementwise_asin(float f, double d, vfloat4 vf4, + vdouble4 vd4) { + // CIR-LABEL: test_builtin_elementwise_asin + // LLVM-LABEL: test_builtin_elementwise_asin + // OGCG-LABEL: test_builtin_elementwise_asin + + // CIR: %{{.*}} = cir.asin %{{.*}} : !cir.float + // LLVM: %{{.*}} = call float @llvm.asin.f32(float %{{.*}}) + // OGCG: %{{.*}} = call float @llvm.asin.f32(float %{{.*}}) + f = __builtin_elementwise_asin(f); + + // CIR: %{{.*}} = cir.asin %{{.*}} : !cir.double + // LLVM: %{{.*}} = call double @llvm.asin.f64(double %{{.*}}) + // OGCG: %{{.*}} = call double @llvm.asin.f64(double %{{.*}}) + d = __builtin_elementwise_asin(d); + + // CIR: %{{.*}} = cir.asin %{{.*}} : !cir.vector<4 x !cir.float> + // LLVM: %{{.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> %{{.*}}) + // OGCG: %{{.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> %{{.*}}) + vf4 = __builtin_elementwise_asin(vf4); + + // CIR: %{{.*}} = cir.asin %{{.*}} : !cir.vector<4 x !cir.double> + // LLVM: %{{.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> %{{.*}}) + // OGCG: %{{.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> %{{.*}}) + vd4 = __builtin_elementwise_asin(vd4); +} + +void test_builtin_elementwise_atan(float f, double d, vfloat4 vf4, + vdouble4 vd4) { + // CIR-LABEL: test_builtin_elementwise_atan + // LLVM-LABEL: test_builtin_elementwise_atan + // OGCG-LABEL: test_builtin_elementwise_atan + + // CIR: %{{.*}} = cir.atan %{{.*}} : !cir.float + // LLVM: %{{.*}} = call float @llvm.atan.f32(float %{{.*}}) + // OGCG: %{{.*}} = call float @llvm.atan.f32(float %{{.*}}) + f = __builtin_elementwise_atan(f); + + // CIR: %{{.*}} = cir.atan %{{.*}} : !cir.double + // LLVM: %{{.*}} = call double @llvm.atan.f64(double %{{.*}}) + // OGCG: %{{.*}} = call double @llvm.atan.f64(double %{{.*}}) + d = __builtin_elementwise_atan(d); + + // CIR: %{{.*}} = cir.atan %{{.*}} : !cir.vector<4 x !cir.float> + // LLVM: %{{.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> %{{.*}}) + // OGCG: %{{.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> %{{.*}}) + vf4 = __builtin_elementwise_atan(vf4); + + // CIR: %{{.*}} = cir.atan %{{.*}} : !cir.vector<4 x !cir.double> + // LLVM: %{{.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> %{{.*}}) + // OGCG: %{{.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> %{{.*}}) + vd4 = __builtin_elementwise_atan(vd4); +} diff --git a/clang/test/CIR/CodeGen/nrvo.cpp b/clang/test/CIR/CodeGen/nrvo.cpp new file mode 100644 index 0000000..72c39d7 --- /dev/null +++ b/clang/test/CIR/CodeGen/nrvo.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-elide-constructors -fclangir -emit-cir %s -o %t-noelide.cir +// RUN: FileCheck --input-file=%t-noelide.cir %s --check-prefix=CIR-NOELIDE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +// There are no LLVM and OGCG tests with -fno-elide-constructors because the +// lowering isn't of interest for this test. We just need to see that the +// copy constructor is elided without -fno-elide-constructors but not with it. + +struct S { + S(); + int a; + int b; +}; + +struct S f1() { + S s; + return s; +} + +// CIR: cir.func{{.*}} @_Z2f1v() -> !rec_S { +// CIR-NEXT: %[[RETVAL:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval", init] +// CIR-NEXT: cir.call @_ZN1SC1Ev(%[[RETVAL]]) : (!cir.ptr<!rec_S>) -> () +// CIR-NEXT: %[[RET:.*]] = cir.load %[[RETVAL]] : !cir.ptr<!rec_S>, !rec_S +// CIR-NEXT: cir.return %[[RET]] + +// CIR-NOELIDE: cir.func{{.*}} @_Z2f1v() -> !rec_S { +// CIR-NOELIDE-NEXT: %[[RETVAL:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval"] +// CIR-NOELIDE-NEXT: %[[S:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] +// CIR-NOELIDE-NEXT: cir.call @_ZN1SC1Ev(%[[S]]) : (!cir.ptr<!rec_S>) -> () +// CIR-NOELIDE-NEXT: cir.call @_ZN1SC1EOS_(%[[RETVAL]], %[[S]]){{.*}} : (!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) -> () +// CIR-NOELIDE-NEXT: %[[RET:.*]] = cir.load %[[RETVAL]] : !cir.ptr<!rec_S>, !rec_S +// CIR-NOELIDE-NEXT: cir.return %[[RET]] + +// FIXME: Update this when calling convetnion lowering is implemented. +// LLVM: define{{.*}} %struct.S @_Z2f1v() +// LLVM-NEXT: %[[RETVAL:.*]] = alloca %struct.S +// LLVM-NEXT: call void @_ZN1SC1Ev(ptr %[[RETVAL]]) +// LLVM-NEXT: %[[RET:.*]] = load %struct.S, ptr %[[RETVAL]] +// LLVM-NEXT: ret %struct.S %[[RET]] + +// OGCG: define{{.*}} i64 @_Z2f1v() +// OGCG-NEXT: entry: +// OGCG-NEXT: %[[RETVAL:.*]] = alloca %struct.S +// OGCG-NEXT: call void @_ZN1SC1Ev(ptr {{.*}} %[[RETVAL]]) +// OGCG-NEXT: %[[RET:.*]] = load i64, ptr %[[RETVAL]] +// OGCG-NEXT: ret i64 %[[RET]] diff --git a/clang/test/CIR/IR/alloca.cir b/clang/test/CIR/IR/alloca.cir new file mode 100644 index 0000000..12f7e6a --- /dev/null +++ b/clang/test/CIR/IR/alloca.cir @@ -0,0 +1,32 @@ + +// RUN: cir-opt %s | FileCheck %s + +!u64i = !cir.int<u, 64> +!u8i = !cir.int<u, 8> +!void = !cir.void +module { + cir.func dso_local @_Z11test_allocam(%arg0: !u64i) -> !cir.ptr<!void> { + %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init] {alignment = 8 : i64} + %1 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__retval"] {alignment = 8 : i64} + cir.store %arg0, %0 : !u64i, !cir.ptr<!u64i> + %2 = cir.load align(8) %0 : !cir.ptr<!u64i>, !u64i + // Dynamically sized alloca + %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64} + %4 = cir.cast(bitcast, %3 : !cir.ptr<!u8i>), !cir.ptr<!void> + cir.store %4, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>> + %5 = cir.load %1 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void> + cir.return %5 : !cir.ptr<!void> + } + + // CHECK: cir.func dso_local @_Z11test_allocam(%arg0: !u64i) -> !cir.ptr<!void> { + // CHECK: %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init] {alignment = 8 : i64} + // CHECK: %1 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__retval"] {alignment = 8 : i64} + // CHECK: cir.store %arg0, %0 : !u64i, !cir.ptr<!u64i> + // CHECK: %2 = cir.load align(8) %0 : !cir.ptr<!u64i>, !u64i + // CHECK: %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64} + // CHECK: %4 = cir.cast(bitcast, %3 : !cir.ptr<!u8i>), !cir.ptr<!void> + // CHECK: cir.store %4, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>> + // CHECK: %5 = cir.load %1 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void> + // CHECK: cir.return %5 : !cir.ptr<!void> + // CHECK: } +} diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c index fb788c5..85f5f1a 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c @@ -3,8 +3,7 @@ // module below does not transitively import Mod via a symlink, so it should not // see the symlinked path. -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: split-file %s %t diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c index 1929b5b..eb49ab9 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c @@ -5,8 +5,7 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: mkdir -p %t/frameworks-symlink // RUN: ln -s %t/frameworks/FW.framework %t/frameworks-symlink/FW.framework diff --git a/clang/test/ClangScanDeps/modules-symlink-dir.c b/clang/test/ClangScanDeps/modules-symlink-dir.c index cf4a099..9946b57 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir.c @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // Check that we canonicalize the module map path without changing the module // directory, which would break header lookup. diff --git a/clang/test/ClangScanDeps/subframework_header_dir_symlink.m b/clang/test/ClangScanDeps/subframework_header_dir_symlink.m index 801a392..66ff9df 100644 --- a/clang/test/ClangScanDeps/subframework_header_dir_symlink.m +++ b/clang/test/ClangScanDeps/subframework_header_dir_symlink.m @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t.dir // RUN: rm -rf %t.cdb // RUN: mkdir -p %t.dir diff --git a/clang/test/ClangScanDeps/symlink.cpp b/clang/test/ClangScanDeps/symlink.cpp index 1151820..5b13d88 100644 --- a/clang/test/ClangScanDeps/symlink.cpp +++ b/clang/test/ClangScanDeps/symlink.cpp @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t.dir // RUN: rm -rf %t.cdb diff --git a/clang/test/CodeCompletion/included-symlinks.cpp b/clang/test/CodeCompletion/included-symlinks.cpp index 6a0d261..c3e7c0ab 100644 --- a/clang/test/CodeCompletion/included-symlinks.cpp +++ b/clang/test/CodeCompletion/included-symlinks.cpp @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t && mkdir -p %t/real/myproj && mkdir -p %t/links // RUN: touch %t/real/foo.h && ln -s %t/real/foo.h %t/links/foo.h // RUN: touch %t/real/foobar.h && ln -s %t/real/foobar.h %t/links/foobar.h diff --git a/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c index cabff7e..8dfdc8c 100644 --- a/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c +++ b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s -// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s -// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zimop -emit-llvm -o - %s -fcf-protection=return | FileCheck %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zimop -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s +// RUN: %clang_cc1 -triple riscv32 -target-feature +zimop -emit-llvm -o - %s -fcf-protection=return | FileCheck %s +// RUN: %clang_cc1 -triple riscv32 -target-feature +zimop -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s int foo(int *a) { return *a; } diff --git a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c index c5189d6..5e998b7 100644 --- a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c +++ b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c @@ -80,16 +80,16 @@ int test_vsetvlmax_e64m1() { } //. -// CHECK: attributes #0 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zaamo,+zalrsc,+zifencei,+zmmul,-relax,-zbb,-zfa" } -// CHECK: attributes #1 = { {{.*}}"target-cpu"="rocket-rv64" "target-features"="+64bit,+a,+d,+f,+m,+save-restore,+v,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zbb,-zfa" "tune-cpu"="generic-rv64" } -// CHECK: attributes #2 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zifencei,+zmmul,-relax,-zfa" } -// CHECK: attributes #3 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+m,+save-restore,+v,+zaamo,+zalrsc,+zbb,+zicond,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zfa" } +// CHECK: attributes #0 = { {{.*}}"target-features"="+64bit,+a,+i,+m,+save-restore,+zaamo,+zalrsc,+zifencei,+zmmul,-relax,-zbb,-zfa" } +// CHECK: attributes #1 = { {{.*}}"target-cpu"="rocket-rv64" "target-features"="+64bit,+a,+d,+f,+i,+m,+save-restore,+v,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zbb,-zfa" "tune-cpu"="generic-rv64" } +// CHECK: attributes #2 = { {{.*}}"target-features"="+64bit,+a,+i,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zifencei,+zmmul,-relax,-zfa" } +// CHECK: attributes #3 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+i,+m,+save-restore,+v,+zaamo,+zalrsc,+zbb,+zicond,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zfa" } // Make sure we append negative features if we override the arch -// CHECK: attributes #4 = { {{.*}}"target-features"="+64bit,+a,+c,+d,+f,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zca,+zcd,+zicsr,+zifencei,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } -// CHECK: attributes #5 = { {{.*}}"target-features"="+64bit,+m,+save-restore,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } -// CHECK: attributes #6 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zifencei,+zmmul,-relax,-zfa" } -// CHECK: attributes #7 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+m,+save-restore,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } -// CHECK: attributes #8 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+c,+d,+f,+m,+save-restore,+zaamo,+zalrsc,+zca,+zcd,+zicsr,+zifencei,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } -// CHECK: attributes #9 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32x,+zvl32b,-relax,-zbb,-zfa" } -// CHECK: attributes #11 = { {{.*}}"target-features"="+64bit,+a,+f,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zvl32b,-relax,-zbb,-zfa" } -// CHECK: attributes #12 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl32b,+zvl64b,-relax,-zbb,-zfa" } +// CHECK: attributes #4 = { {{.*}}"target-features"="+64bit,+a,+c,+d,+f,+i,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zca,+zcd,+zicsr,+zifencei,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } +// CHECK: attributes #5 = { {{.*}}"target-features"="+64bit,+i,+m,+save-restore,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } +// CHECK: attributes #6 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+i,+m,+save-restore,+zaamo,+zalrsc,+zbb,+zifencei,+zmmul,-relax,-zfa" } +// CHECK: attributes #7 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+i,+m,+save-restore,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } +// CHECK: attributes #8 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+c,+d,+f,+i,+m,+save-restore,+zaamo,+zalrsc,+zca,+zcd,+zicsr,+zifencei,+zmmul,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } +// CHECK: attributes #9 = { {{.*}}"target-features"="+64bit,+a,+i,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32x,+zvl32b,-relax,-zbb,-zfa" } +// CHECK: attributes #11 = { {{.*}}"target-features"="+64bit,+a,+f,+i,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zvl32b,-relax,-zbb,-zfa" } +// CHECK: attributes #12 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+i,+m,+save-restore,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl32b,+zvl64b,-relax,-zbb,-zfa" } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c index 582d5fd..1210a78 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c @@ -21,10 +21,10 @@ unsigned long test_vlenb(void) { return __riscv_vlenb(); } //. -// RV32: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+32bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } +// RV32: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+32bit,+d,+f,+i,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } // RV32: attributes #[[ATTR1:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) } //. -// RV64: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } +// RV64: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+i,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } // RV64: attributes #[[ATTR1:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) } //. // RV32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index f255dbe..8223ab2 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1540,18 +1540,21 @@ __m256 test_mm256_set_m128(__m128 A, __m128 B) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm256_set_m128(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_set_m128((__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f)); __m256d test_mm256_set_m128d(__m128d A, __m128d B) { // CHECK-LABEL: test_mm256_set_m128d // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_set_m128d(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_set_m128d((__m128d){10.0, 20.0}, (__m128d){1.0, 2.0}), 1.0, 2.0, 10.0, 20.0)); __m256i test_mm256_set_m128i(__m128i A, __m128i B) { // CHECK-LABEL: test_mm256_set_m128i // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_set_m128i(A, B); } +TEST_CONSTEXPR(match_m256i(_mm256_set_m128i((__m128i){10LL, 20LL}, (__m128i){1LL, 2LL}), 1LL, 2LL, 10LL, 20LL)); __m256d test_mm256_set_pd(double A0, double A1, double A2, double A3) { // CHECK-LABEL: test_mm256_set_pd @@ -1778,18 +1781,21 @@ __m256 test_mm256_setr_m128(__m128 A, __m128 B) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm256_setr_m128(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_setr_m128((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f)); __m256d test_mm256_setr_m128d(__m128d A, __m128d B) { // CHECK-LABEL: test_mm256_setr_m128d // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_setr_m128d(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_setr_m128d((__m128d){1.0, 2.0}, (__m128d){10.0, 20.0}), 1.0, 2.0, 10.0, 20.0)); __m256i test_mm256_setr_m128i(__m128i A, __m128i B) { // CHECK-LABEL: test_mm256_setr_m128i // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_setr_m128i(A, B); } +TEST_CONSTEXPR(match_m256i(_mm256_setr_m128i((__m128i){1LL, 2LL}, (__m128i){10LL, 20LL}), 1LL, 2LL, 10LL, 20LL)); __m256d test_mm256_setr_pd(double A0, double A1, double A2, double A3) { // CHECK-LABEL: test_mm256_setr_pd diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 724a5f6..aeb1aee 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -128,12 +128,14 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) { // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_avg_epu8(a, b); } +TEST_CONSTEXPR(match_v32qu(_mm256_avg_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); __m256i test_mm256_avg_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu16 // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_avg_epu16(a, b); } +TEST_CONSTEXPR(match_v16hu(_mm256_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); // FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar) // functions to this IR. In the future we could delete the corresponding diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 1f67a9e..9d605ef 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1060,35 +1060,47 @@ __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pavg.b.512 return _mm512_avg_epu8(__A,__B); } +TEST_CONSTEXPR(match_v64qu(_mm512_avg_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_avg_epu8((__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_avg_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 return _mm512_avg_epu16(__A,__B); } +TEST_CONSTEXPR(match_v32hu(_mm512_avg_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_avg_epu16((__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_avg_epu16(0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) @@ -1744,6 +1756,7 @@ __m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.psllv.w.512( return _mm512_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_sllv_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 528, 0, 2176, 4416, -8960, 18176, 0, 9216, 20480, 20480, 0, -16384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_sllv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_sllv_epi16 @@ -1751,6 +1764,7 @@ __m512i test_mm512_mask_sllv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_sllv_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 999, 0, 999, 4416, -8960, 999, 0, 9216, 20480, 999, 999, -16384, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, 0)); __m512i test_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_sllv_epi16 @@ -1758,6 +1772,7 @@ __m512i test_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sllv_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 0, 0, 0, 4416, -8960, 0, 0, 9216, 20480, 0, 0, -16384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sll_epi16 @@ -1841,6 +1856,7 @@ __m512i test_mm512_srlv_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.psrlv.w.512( return _mm512_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_srlv_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 8, 0, 2, 1, 511, 0, 0, 0, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_srlv_epi16 @@ -1848,6 +1864,7 @@ __m512i test_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srlv_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 999, 0, 999, 1, 511, 999, 0, 0, 0, 999, 999, 3, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, 0)); __m512i test_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_srlv_epi16 @@ -1855,12 +1872,14 @@ __m512i test_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srlv_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 0, 0, 0, 1, 511, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_srav_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_srav_epi16 // CHECK: @llvm.x86.avx512.psrav.w.512( return _mm512_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_srav_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 8, -1, 2, 1, -1, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, -1, -1)); __m512i test_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_srav_epi16 @@ -1868,6 +1887,7 @@ __m512i test_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srav_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 999, -1, 999, 1, -1, 999, -1, 0, 0, 999, 999, -1, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, -1)); __m512i test_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_srav_epi16 @@ -1875,6 +1895,7 @@ __m512i test_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srav_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 0, -1, 0, 1, -1, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1)); __m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sra_epi16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 10d464c..f93216e 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -4174,6 +4174,7 @@ __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) { // CHECK: @llvm.fshl.v16i32 return _mm512_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_rolv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -32769, 786432, -24577, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 60, 2147483640)); __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rolv_epi32 @@ -4181,6 +4182,7 @@ __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rolv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 786432, 999, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 999, 2147483640)); __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rolv_epi32 @@ -4188,12 +4190,14 @@ __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rolv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 786432, 0, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 0, 2147483640)); __m512i test_mm512_rolv_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rolv_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_rolv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, -129, 864691128455135232LL, -97, 5764607523034234880LL, -4611686018427387904LL, -25, -15)); __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rolv_epi64 @@ -4201,6 +4205,7 @@ __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rolv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 999, 999, -97, 5764607523034234880LL, 999, 999, 999)); __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rolv_epi64 @@ -4208,6 +4213,7 @@ __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rolv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 0, 0, -97, 5764607523034234880LL, 0, 0, 0)); __m512i test_mm512_ror_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_ror_epi32 @@ -4260,6 +4266,7 @@ __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) { // CHECK: @llvm.fshr.v16i32 return _mm512_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_rorv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -131073, 49152, -1572865, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, -1073741821, -31)); __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rorv_epi32 @@ -4267,6 +4274,7 @@ __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rorv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 49152, 999, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 999, -31)); __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rorv_epi32 @@ -4274,12 +4282,14 @@ __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rorv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 49152, 0, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 0, -31)); __m512i test_mm512_rorv_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rorv_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_rorv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, -144115188075855873LL, 192, -1729382256910270465LL, 80, 48, 9223372036854775806LL, 9223372036854775804LL)); __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rorv_epi64 @@ -4287,6 +4297,7 @@ __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rorv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 999, 999, -1729382256910270465LL, 80, 999, 999, 999)); __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rorv_epi64 @@ -4294,6 +4305,7 @@ __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rorv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 0, 0, -1729382256910270465LL, 80, 0, 0, 0)); __m512i test_mm512_slli_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_slli_epi32 @@ -5999,6 +6011,7 @@ __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psllv.q.512 return _mm512_sllv_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_sllv_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 32, -68, 0, 0, 0, 1344, 0, 0)); __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_sllv_epi64 @@ -6006,6 +6019,7 @@ __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sllv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_sllv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 1344, 0, 0)); __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_sllv_epi64 @@ -6013,6 +6027,7 @@ __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sllv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_sllv_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 0, 0, 0, 0, 0, 1344, 0, 0)); __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sra_epi32 @@ -6082,6 +6097,7 @@ __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrav.q.512 return _mm512_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_srav_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 8, -5, 0, -1, 0, 0, 0, 0)); __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srav_epi64 @@ -6089,6 +6105,7 @@ __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srav_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 0, 0, 0)); __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srav_epi64 @@ -6096,6 +6113,7 @@ __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srav_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_srl_epi32 @@ -6165,6 +6183,7 @@ __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrlv.q.512 return _mm512_srlv_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_srlv_epi64((__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 8, 4, 2305843009213693949, 1, 0, 0, 0, 72057594037927935)); __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srlv_epi64 @@ -6172,6 +6191,7 @@ __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srlv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srlv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 999, 999, 999, 1, 999, 0, 999, 999)); __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srlv_epi64 @@ -6179,6 +6199,7 @@ __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srlv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srlv_epi64(0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 0, 0, 0, 1, 0, 0, 0, 0)); __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_ternarylogic_epi32 diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index 4f6139b..5600355 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m512i test_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { // CHECK-LABEL: test_mm512_mask_compress_epi16 @@ -95,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999)); __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi64 @@ -102,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0)); __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi64 // CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL)); __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi32 @@ -115,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999)); __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi32 @@ -122,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0)); __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256)); __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi16 @@ -135,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999)); __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi16 @@ -142,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0)); __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160)); __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi64 @@ -155,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 16384, 32767, 999, 999, 999, -49153, 999)); __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi64 @@ -162,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 16384, 32767, 0, 0, 0, -49153, 0)); __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi64 // CHECK: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shrdi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), -8192, 16384, 32767, -32768, -32769, 49152, -49153, -65536)); __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi32 @@ -175,6 +188,7 @@ __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 999, 999, -8388608, 999, -10485761, -12582913, -16777216, 999, -20971520, 999, 999, 999, -29360128, -29360129, 999)); __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi32 @@ -182,12 +196,14 @@ __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 0, 0, -8388608, 0, -10485761, -12582913, -16777216, 0, -20971520, 0, 0, 0, -29360128, -29360129, 0)); __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi32 // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shrdi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 4194304, 8388607, -8388608, 10485760, -10485761, -12582913, -16777216, -16777217, -20971520, -23068672, -23068673, -25165825, -29360128, -29360129, 35651583)); __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi16 @@ -195,6 +211,7 @@ __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 999, -1025, 999, 2560, 3583, 999, -3585, 4608, 5120, 999, 999, 7167, -7168, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 12288, 999, 999, 999, -14336, -14848, 999, -15361, 999)); __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi16 @@ -202,12 +219,14 @@ __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 0, -1025, 0, 2560, 3583, 0, -3585, 4608, 5120, 0, 0, 7167, -7168, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 12288, 0, 0, 0, -14336, -14848, 0, -15361, 0)); __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi16 // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31)) return _mm512_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v32hi(_mm512_shrdi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 1024, -1025, 2048, 2560, 3583, 3584, -3585, 4608, 5120, 6143, -6144, 7167, -7168, -7680, 8703, 9215, -8705, -9728, -9729, 10752, -11264, 11776, 12288, -12289, 13823, -13824, -14336, -14848, -14849, -15361, 16384)); __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi64 @@ -215,6 +234,7 @@ __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, 0xC1, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 17, -18, 19, -20, 21, 91, -9223372036854775804LL)); __m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi64 @@ -222,12 +242,14 @@ __m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldv_epi64(0xC1, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 0, 0, 0, 0, 0, 91, -9223372036854775804LL)); __m512i test_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_shldv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 2176, -5188146770730811392LL, 639, -3458764513820540929LL, -6917529027641081856LL, 91, -9223372036854775804LL)); __m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi32 @@ -235,6 +257,7 @@ __m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, 0x26D8, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 32, -33, 34, 18874367, 37748736, 37, -159383552, 327155712, -40, -5248, -1476395008, 43, 44, 360, 46, -47)); __m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi32 @@ -242,12 +265,14 @@ __m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldv_epi32(0x26D8, (__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 0, 0, 0, 18874367, 37748736, 0, -159383552, 327155712, 0, -5248, -1476395008, 0, 0, 360, 0, 0)); __m512i test_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_shldv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 2097152, -4325376, 573439, 18874367, 37748736, 77823, -159383552, 327155712, -10240, -5248, -1476395008, 1376, 719, 360, -1073741828, -2147483640)); __m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi16 @@ -255,6 +280,7 @@ __m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, 0x73314D8, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 65, 66, 536, 1088, -69, 7167, 29184, -72, 73, 10240, 75, -1216, -77, -78, -79, -80, -162, 82, -83, 16385, 2751, 86, 87, -22528, 11519, 5760, -91, 92, 93, 94, 95)); __m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi16 @@ -262,12 +288,14 @@ __m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldv_epi16(0x73314D8, (__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), 0, 0, 0, 536, 1088, 0, 7167, 29184, 0, 0, 10240, 0, -1216, 0, 0, 0, -80, -162, 0, 0, 16385, 2751, 0, 0, -22528, 11519, 5760, 0, 0, 0, 0, 0)); __m512i test_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 130, 267, 536, 1088, -8193, 7167, 29184, -18432, -27649, 10240, 2400, -1216, -609, -312, -32760, -80, -162, -32764, -24574, 16385, 2751, 5567, 11136, -22528, 11519, 5760, 10240, -12290, 751, 379, -16)); __m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi64 @@ -275,6 +303,7 @@ __m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, 0xC1, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 17, -18, 19, -20, 21, 4611686018427387909LL, 17)); __m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi64 @@ -282,12 +311,14 @@ __m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdv_epi64(0xC1, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 0, 0, 0, 0, 0, 4611686018427387909LL, 17)); __m512i test_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_shrdv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 288230376151711744LL, 255, -2305843009213693952LL, -65, 48, 4611686018427387909LL, 17)); __m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi32 @@ -295,6 +326,7 @@ __m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, 0x26D8, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 32, -33, 34, -32768, 20480, 37, 8191, 4096, -40, 369098751, 704, 43, 44, -1073741819, 46, -47)); __m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi32 @@ -302,12 +334,14 @@ __m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdv_epi32(0x26D8, (__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 0, 0, 0, -32768, 20480, 0, 8191, 4096, 0, 369098751, 704, 0, 0, -1073741819, 0, 0)); __m512i test_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi32 // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_shrdv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 65536, 98303, -786432, -32768, 20480, -12582912, 8191, 4096, 167772159, 369098751, 704, 1610612737, 805306370, -1073741819, -60, 33)); __m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi16 @@ -315,6 +349,7 @@ __m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, 0x73314D8, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 65, 66, -32760, 20484, -69, -448, 1151, -72, 73, 704, 75, -8197, -77, -78, -79, -80, 32727, 82, -83, 336, 20482, 86, 87, 6655, -13312, 27649, -91, 92, 93, 94, 95)); __m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi16 @@ -322,10 +357,12 @@ __m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdv_epi16(0x73314D8, (__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), 0, 0, 0, -32760, 20484, 0, -448, 1151, 0, 0, 704, 0, -8197, 0, 0, 0, -80, 32727, 0, 0, 336, 20482, 0, 0, 6655, -13312, 27649, 0, 0, 0, 0, 0)); __m512i test_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi16 // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_shrdv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 32, 16400, -32760, 20484, -161, -448, 1151, 2559, -1280, 704, 24578, -8197, 24566, -20, 33, -80, 32727, 76, 167, 336, 20482, -23551, 12288, 6655, -13312, 27649, 927, -464, 16395, 16407, -64)); diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5690c00..9daecd0 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -6036,6 +6036,7 @@ __m128i test_mm_rolv_epi32(__m128i __A, __m128i __B) { // CHECK: llvm.fshl.v4i32 return _mm_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_rolv_epi32((__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, -1073741824, 2147483646)); __m128i test_mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rolv_epi32 @@ -6043,6 +6044,7 @@ __m128i test_mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rolv_epi32((__m128i)(__v4si){ 999, 999, 999, 999}, 0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, 999, 999)); __m128i test_mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rolv_epi32 @@ -6050,12 +6052,14 @@ __m128i test_mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rolv_epi32(0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, 0, 0)); __m256i test_mm256_rolv_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rolv_epi32 // CHECK: @llvm.fshl.v8i32 return _mm256_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_rolv_epi32((__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, -33554433, 201326592, 128, -65, -1073741824, -25, 16)); __m256i test_mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rolv_epi32 @@ -6063,6 +6067,7 @@ __m256i test_mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rolv_epi32((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 999, 999, 128, 999, -1073741824, -25, 999)); __m256i test_mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rolv_epi32 @@ -6070,12 +6075,14 @@ __m256i test_mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rolv_epi32(0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 0, 0, 128, 0, -1073741824, -25, 0)); __m128i test_mm_rolv_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rolv_epi64 // CHECK: @llvm.fshl.v2i64 return _mm_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_rolv_epi64((__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 4, 4)); __m128i test_mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rolv_epi64 @@ -6083,6 +6090,7 @@ __m128i test_mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rolv_epi64((__m128i)(__v2di){ 999, 999}, 0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 999, 4)); __m128i test_mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rolv_epi64 @@ -6090,12 +6098,14 @@ __m128i test_mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rolv_epi64(0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 0, 4)); __m256i test_mm256_rolv_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rolv_epi64 // CHECK: @llvm.fshl.v4i64 return _mm256_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_rolv_epi64((__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 16, 9223372036854775807LL, 8)); __m256i test_mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rolv_epi64 @@ -6103,6 +6113,7 @@ __m256i test_mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rolv_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 999, 9223372036854775807LL, 8)); __m256i test_mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rolv_epi64 @@ -6110,6 +6121,7 @@ __m256i test_mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rolv_epi64(0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 0, 9223372036854775807LL, 8)); __m128i test_mm_ror_epi32(__m128i __A) { // CHECK-LABEL: test_mm_ror_epi32 @@ -6208,6 +6220,7 @@ __m128i test_mm_rorv_epi32(__m128i __A, __m128i __B) { // CHECK: @llvm.fshr.v4i32 return _mm_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_rorv_epi32((__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 12, -7)); __m128i test_mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rorv_epi32 @@ -6215,6 +6228,7 @@ __m128i test_mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rorv_epi32((__m128i)(__v4si){ 999, 999, 999, 999}, 0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 999, 999)); __m128i test_mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rorv_epi32 @@ -6222,12 +6236,14 @@ __m128i test_mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rorv_epi32(0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 0, 0)); __m256i test_mm256_rorv_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rorv_epi32 // CHECK: @llvm.fshr.v8i32 return _mm256_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_rorv_epi32((__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, -129, 192, 536870912, -1073741825, 48, 2147483646, 4)); __m256i test_mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rorv_epi32 @@ -6235,6 +6251,7 @@ __m256i test_mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rorv_epi32((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 999, 999, 536870912, 999, 48, 2147483646, 999)); __m256i test_mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rorv_epi32 @@ -6242,12 +6259,14 @@ __m256i test_mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rorv_epi32(0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 0, 0, 536870912, 0, 48, 2147483646, 0)); __m128i test_mm_rorv_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rorv_epi64 // CHECK: @llvm.fshr.v2i64 return _mm_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_rorv_epi64((__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 4611686018427387904LL, 1)); __m128i test_mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rorv_epi64 @@ -6255,6 +6274,7 @@ __m128i test_mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rorv_epi64((__m128i)(__v2di){ 999, 999}, 0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 999, 1)); __m128i test_mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rorv_epi64 @@ -6262,12 +6282,14 @@ __m128i test_mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rorv_epi64(0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 0, 1)); __m256i test_mm256_rorv_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rorv_epi64 // CHECK: @llvm.fshr.v4i64 return _mm256_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_rorv_epi64((__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 4611686018427387904LL, -9, 2)); __m256i test_mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rorv_epi64 @@ -6275,6 +6297,7 @@ __m256i test_mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rorv_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 999, -9, 2)); __m256i test_mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rorv_epi64 @@ -6282,6 +6305,7 @@ __m256i test_mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rorv_epi64(0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 0, -9, 2)); __m128i test_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_sllv_epi64 @@ -6799,6 +6823,7 @@ __m128i test_mm_srav_epi64(__m128i __X, __m128i __Y) { // CHECK: @llvm.x86.avx512.psrav.q.128 return _mm_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_srav_epi64((__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 2, 1)); __m128i test_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_srav_epi64 @@ -6806,6 +6831,7 @@ __m128i test_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_mask_srav_epi64((__m128i)(__v2di){ 999, 999}, 0x0, (__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 999, 999)); __m128i test_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_srav_epi64 @@ -6813,12 +6839,14 @@ __m128i test_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_srav_epi64(0x0, (__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 0, 0)); __m256i test_mm256_srav_epi64(__m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.256 return _mm256_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_srav_epi64((__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), -4, 2, -1, -1)); __m256i test_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_srav_epi64 @@ -6826,6 +6854,7 @@ __m256i test_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_srav_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xA, (__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), 999, 2, 999, -1)); __m256i test_mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_srav_epi64 @@ -6833,6 +6862,7 @@ __m256i test_mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srav_epi64(0xA, (__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), 0, 2, 0, -1)); void test_mm_store_epi32(void *__P, __m128i __A) { // CHECK-LABEL: test_mm_store_epi32 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index cadfd46..d62235a 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -1206,48 +1206,64 @@ __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_avg_epu8((__m128i)(__v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu8 // CHECK: @llvm.x86.sse2.pavg.b // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_avg_epu8(0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_avg_epu8((__m256i)(__v32qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_avg_epu8(0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_avg_epu16((__m128i)(__v8hi){0, 1, 2, 3, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_avg_epu16(0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_avg_epu16((__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_avg_epu16(0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -2209,6 +2225,7 @@ __m256i test_mm256_sllv_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psllv.w.256( return _mm256_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_sllv_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), -64, 0, -272, 560, -1152, -2368, 0, -9984, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m256i test_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_sllv_epi16 @@ -2216,6 +2233,7 @@ __m256i test_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_sllv_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, -272, 999, -1152, 999, 0, 999, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m256i test_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_sllv_epi16 @@ -2223,12 +2241,14 @@ __m256i test_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_sllv_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, -272, 0, -1152, 0, 0, 0, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m128i test_mm_sllv_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_sllv_epi16 // CHECK: @llvm.x86.avx512.psllv.w.128( return _mm_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_sllv_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 0, 0, -640, 0, 0, 5888)); __m128i test_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sllv_epi16 @@ -2236,6 +2256,7 @@ __m128i test_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_sllv_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 999, 999, -640, 999, 999, 5888)); __m128i test_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_sllv_epi16 @@ -2243,6 +2264,7 @@ __m128i test_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_sllv_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 0, 0, -640, 0, 0, 5888)); __m128i test_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sll_epi16 @@ -2339,6 +2361,7 @@ __m256i test_mm256_srlv_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psrlv.w.256( return _mm256_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_srlv_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 32752, 0, 8187, 2, 2046, 1023, 0, 255, 0, 0, 0, 0, 0, 0, 1, 0)); __m256i test_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_srlv_epi16 @@ -2346,6 +2369,7 @@ __m256i test_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srlv_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, 8187, 999, 2046, 999, 0, 999, 0, 0, 0, 0, 0, 0, 1, 0)); __m256i test_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_srlv_epi16 @@ -2353,12 +2377,14 @@ __m256i test_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srlv_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, 8187, 0, 2046, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0)); __m128i test_mm_srlv_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_srlv_epi16 // CHECK: @llvm.x86.avx512.psrlv.w.128( return _mm_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_srlv_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, 2047, 0, 0, 0)); __m128i test_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srlv_epi16 @@ -2366,6 +2392,7 @@ __m128i test_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_srlv_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 999, 999, 2047, 999, 999, 0)); __m128i test_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_srlv_epi16 @@ -2373,6 +2400,7 @@ __m128i test_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_srlv_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, 2047, 0, 0, 0)); __m128i test_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srl_epi16 @@ -2466,6 +2494,7 @@ __m256i test_mm256_srav_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psrav.w.256( return _mm256_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_srav_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), -16, 0, -5, 2, -2, -1, 0, -1, -1, -1, 0, 0, 0, -1, -1, -1)); __m256i test_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_srav_epi16 @@ -2473,6 +2502,7 @@ __m256i test_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srav_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, -5, 999, -2, 999, 0, 999, -1, -1, 0, 0, 0, -1, -1, -1)); __m256i test_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_srav_epi16 @@ -2480,12 +2510,14 @@ __m256i test_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srav_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, -5, 0, -2, 0, 0, 0, -1, -1, 0, 0, 0, -1, -1, -1)); __m128i test_mm_srav_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_srav_epi16 // CHECK: @llvm.x86.avx512.psrav.w.128( return _mm_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_srav_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, -1, -1, -1, 0, 0)); __m128i test_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srav_epi16 @@ -2493,6 +2525,7 @@ __m128i test_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_srav_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 999, 999, -1, 999, 999, 0)); __m128i test_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_srav_epi16 @@ -2500,6 +2533,7 @@ __m128i test_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_srav_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, -1, 0, 0, 0)); __m128i test_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sra_epi16 diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index 7259325..e1e8578 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { // CHECK-LABEL: test_mm_mask_compress_epi16 @@ -179,6 +180,7 @@ __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shldi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 999, 12384898975268864LL)); __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi64 @@ -186,12 +188,14 @@ __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 0, 12384898975268864LL)); __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi64 // CHECK: call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)) return _mm256_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shldi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 11258999068426240LL, 12384898975268864LL)); __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi64 @@ -199,6 +203,7 @@ __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shldi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, -160)); __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi64 @@ -206,12 +211,14 @@ __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shldi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, -160)); __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi64 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shldi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -97, -160)); __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi32 @@ -219,6 +226,7 @@ __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shldi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 9216, -9217, 10240, 999, -11264, -11776)); __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi32 @@ -226,12 +234,14 @@ __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 9216, -9217, 10240, 0, -11264, -11776)); __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi32 // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)) return _mm256_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shldi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), -8192, 9215, 9216, -9217, 10240, 10752, -11264, -11776)); __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi32 @@ -239,6 +249,7 @@ __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shldi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 999, 11263, -11264)); __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi32 @@ -246,12 +257,14 @@ __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shldi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 0, 11263, -11264)); __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shldi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 9216, 11263, -11264)); __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi16 @@ -259,6 +272,7 @@ __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 999, 999, 999, 999, -27648, 999, -24577, 25599, 999, 22528, 999, 21503, 999, 999, 999)); __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi16 @@ -266,12 +280,14 @@ __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 0, 0, 0, 0, -27648, 0, -24577, 25599, 0, 22528, 0, 21503, 0, 0, 0)); __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi16 // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shldi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 32767, 30720, -28673, 29695, -27648, 27647, -24577, 25599, 23552, 22528, 21504, 21503, 20479, 19455, 18431)); __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi16 @@ -279,6 +295,7 @@ __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shldi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, -4608, -4864, 5375, 999, 999, 6143)); __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi16 @@ -286,12 +303,14 @@ __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, -4608, -4864, 5375, 0, 0, 6143)); __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shldi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 4351, 4607, -4608, -4864, 5375, -5376, -5632, 6143)); __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi64 @@ -299,6 +318,7 @@ __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 999, 65536)); __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi64 @@ -306,12 +326,14 @@ __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 0, 65536)); __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi64 // CHECK: call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31) return _mm256_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shrdi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 49152, 65536)); __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi64 @@ -319,6 +341,7 @@ __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shrdi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, 1729382256910270463LL)); __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi64 @@ -326,12 +349,14 @@ __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, 1729382256910270463LL)); __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi64 // CHECK: call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shrdi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -1, 1729382256910270463LL)); __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi32 @@ -339,6 +364,7 @@ __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 25165824, -25165825, 41943040, 999, 67108863, 75497471)); __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi32 @@ -346,12 +372,14 @@ __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 25165824, -25165825, 41943040, 0, 67108863, 75497471)); __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi32 // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31) return _mm256_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shrdi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 16777215, -16777216, 25165824, -25165825, 41943040, 50331648, 67108863, 75497471)); __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi32 @@ -359,6 +387,7 @@ __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shrdi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 999, -12582912, 20971519)); __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi32 @@ -366,12 +395,14 @@ __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 0, -12582912, 20971519)); __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi32 // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shrdi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 8388608, -12582912, 20971519)); __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi16 @@ -379,6 +410,7 @@ __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 999, 999, 999, 999, 384, 999, -512, -513, 999, 767, 999, -769, 999, 999, 999)); __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi16 @@ -386,12 +418,14 @@ __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 0, 0, 0, 0, 384, 0, -512, -513, 0, 767, 0, -769, 0, 0, 0)); __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi16 // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shrdi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, -65, 255, -256, -257, 384, -385, -512, -513, 703, 767, 831, -769, -833, -897, -961)); __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi16 @@ -399,6 +433,7 @@ __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, 1023, 1279, -1280, 999, 999, -2048)); __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi16 @@ -406,12 +441,14 @@ __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, 1023, 1279, -1280, 0, 0, -2048)); __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi16 // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shrdi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), -256, -512, 1023, 1279, -1280, 1791, 2047, -2048)); __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi64 @@ -419,6 +456,7 @@ __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shldv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, 0x9, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 9, 10, -22)); __m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi64 @@ -426,12 +464,14 @@ __m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldv_epi64(0x9, (__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 0, 0, -22)); __m256i test_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_shldv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 4611686018427387903LL, 43, -22)); __m128i test_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi64 @@ -439,6 +479,7 @@ __m128i test_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shldv_epi64((__m128i)(__v2di){ -4, -5}, 0x1, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, -5)); __m128i test_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi64 @@ -446,12 +487,14 @@ __m128i test_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shldv_epi64(0x1, (__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, 0)); __m128i test_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_shldv_epi64((__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, -10)); __m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi32 @@ -459,6 +502,7 @@ __m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shldv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, 0xDF, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, -21, -85, -4)); __m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi32 @@ -466,12 +510,14 @@ __m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldv_epi32(0xDF, (__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, 0, -85, -4)); __m256i test_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi32 // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_shldv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, -161, -85, -4)); __m128i test_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi32 @@ -479,6 +525,7 @@ __m128i test_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shldv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, 0xD, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, -9, -1073741825, -22)); __m128i test_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi32 @@ -486,12 +533,14 @@ __m128i test_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shldv_epi32(0xD, (__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, 0, -1073741825, -22)); __m128i test_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_shldv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, -1, -1073741825, -22)); __m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi16 @@ -499,6 +548,7 @@ __m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, 0x12D6, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -1, -32768, 35, -561, 37, 27647, -19968, -40, 21503, -42, 43, 16384, 45, -46, 47)); __m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi16 @@ -506,12 +556,14 @@ __m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldv_epi16(0x12D6, (__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 0, -1, -32768, 0, -561, 0, 27647, -19968, 0, 21503, 0, 0, 16384, 0, 0, 0)); __m256i test_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi16 // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_shldv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -1, -32768, 287, -561, 1215, 27647, -19968, -9985, 21503, -2625, 24575, 16384, 360, -32765, 95)); __m128i test_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi16 @@ -519,6 +571,7 @@ __m128i test_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shldv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, 0x3A, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -16, 8704, -18, -577, 335, 168, 22, -23)); __m128i test_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi16 @@ -526,12 +579,14 @@ __m128i test_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldv_epi16(0x3A, (__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 0, 8704, 0, -577, 335, 168, 0, 0)); __m128i test_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_shldv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -4096, 8704, -18432, -577, 335, 168, 91, -4)); __m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi64 @@ -539,6 +594,7 @@ __m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, 0x9, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, 9, 10, 9223372036854775802LL)); __m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi64 @@ -546,12 +602,14 @@ __m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdv_epi64(0x9, (__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, 0, 0, 9223372036854775802LL)); __m256i test_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_shrdv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, -16, 4611686018427387906LL, 9223372036854775802LL)); __m128i test_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi64 @@ -559,6 +617,7 @@ __m128i test_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shrdv_epi64((__m128i)(__v2di){ -4, -5}, 0x1, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, -5)); __m128i test_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi64 @@ -566,12 +625,14 @@ __m128i test_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdv_epi64(0x1, (__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, 0)); __m128i test_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_shrdv_epi64((__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, 9223372036854775805LL)); __m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi32 @@ -579,6 +640,7 @@ __m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, 0xDF, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, -21, 2147483642, -16)); __m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi32 @@ -586,12 +648,14 @@ __m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdv_epi32(0xDF, (__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, 0, 2147483642, -16)); __m256i test_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi32 // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_shrdv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, 1610612733, 2147483642, -16)); __m128i test_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi32 @@ -599,6 +663,7 @@ __m128i test_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shrdv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, 0xD, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, -9, -9, 2147483642)); __m128i test_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi32 @@ -606,12 +671,14 @@ __m128i test_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdv_epi32(0xD, (__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, 0, -9, 2147483642)); __m128i test_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi32 // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_shrdv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, -9, -9, 2147483642)); __m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi16 @@ -619,6 +686,7 @@ __m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, 0x12D6, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -3, 12, 35, -16387, 37, -385, 1151, -40, -1280, -42, 43, 223, 45, -46, 47)); __m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi16 @@ -626,12 +694,14 @@ __m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdv_epi16(0x12D6, (__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 0, -3, 12, 0, -16387, 0, -385, 1151, 0, -1280, 0, 0, 223, 0, 0, 0)); __m256i test_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi16 // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_shrdv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -3, 12, -32764, -16387, -12287, -385, 1151, -2049, -1280, -10241, -384, 223, -16379, 63, 23)); __m128i test_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi16 @@ -639,6 +709,7 @@ __m128i test_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, 0x3A, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -16, 256, -18, -6145, -20479, -16382, 22, -23)); __m128i test_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi16 @@ -646,10 +717,12 @@ __m128i test_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdv_epi16(0x3A, (__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 0, 256, 0, -6145, -20479, -16382, 0, 0)); __m128i test_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi16 // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_shrdv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 511, 256, 255, -6145, -20479, -16382, 16389, -15)); diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c index 3de4cca..f63b5c6 100644 --- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c @@ -7,41 +7,41 @@ __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B); } __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B); } __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } @@ -87,41 +87,41 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusd_epi32(__S, __U, __A, __B); } __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusds_epi32(__S, __U, __A, __B); } __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c index a0177b3..afe8045 100644 --- a/clang/test/CodeGen/X86/avx512vnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c @@ -7,41 +7,41 @@ __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B); } __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusd_epi32(__S, __A, __B); } __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B); } __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusds_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c index bb28a35..7948e0d 100644 --- a/clang/test/CodeGen/X86/avxvnni-builtins.c +++ b/clang/test/CodeGen/X86/avxvnni-builtins.c @@ -7,13 +7,13 @@ __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } @@ -31,13 +31,13 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } @@ -55,13 +55,13 @@ __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_avx_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_avx_epi32(__S, __A, __B); } __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_avx_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_avx_epi32(__S, __A, __B); } @@ -79,13 +79,13 @@ __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_avx_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_avx_epi32(__S, __A, __B); } __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_avx_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_avx_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b19e8238..43d9ec5 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -123,12 +123,14 @@ __m64 test_mm_avg_pu8(__m64 a, __m64 b) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b( return _mm_avg_pu8(a, b); } +TEST_CONSTEXPR(match_v8qu(_mm_avg_pu8((__m64)(__v8qu){0, 1, 2, 3, 18, 15, 12, 20}, (__m64)(__v8qu){0, 1, 2, 3, 16, 3, 20, 10}), 0, 1, 2, 3, 17, 9, 16, 15)); __m64 test_mm_avg_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_avg_pu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w( return _mm_avg_pu16(a, b); } +TEST_CONSTEXPR(match_v4hu(_mm_avg_pu16((__m64)(__v4hu){18, 15, 12, 20}, (__m64)(__v4hu){16, 3, 20, 10}), 17, 9, 16, 15)); __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi8 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index f5de506..0ba32bb 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -134,12 +134,14 @@ __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_avg_epu8(A, B); } +TEST_CONSTEXPR(match_v16qu(_mm_avg_epu8((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_avg_epu16(A, B); } +TEST_CONSTEXPR(match_v8hu(_mm_avg_epu16((__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 5, 6, 7, 8)); __m128i test_mm_bslli_si128(__m128i A) { // CHECK-LABEL: test_mm_bslli_si128 diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c index 994fc7b..a3cff2c 100644 --- a/clang/test/CodeGen/X86/xop-builtins.c +++ b/clang/test/CodeGen/X86/xop-builtins.c @@ -215,24 +215,28 @@ __m128i test_mm_rot_epi8(__m128i a, __m128i b) { // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_rot_epi8(a, b); } +TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0)); __m128i test_mm_rot_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_rot_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0)); __m128i test_mm_rot_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_rot_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0)); __m128i test_mm_rot_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_rot_epi64(a, b); } +TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL)); __m128i test_mm_roti_epi8(__m128i a) { // CHECK-LABEL: test_mm_roti_epi8 diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c index e225fb6..6de7676 100644 --- a/clang/test/CodeGen/allow-ubsan-check.c +++ b/clang/test/CodeGen/allow-ubsan-check.c @@ -51,7 +51,7 @@ // TR-NEXT: [[TMP9:%.*]] = and i1 [[TMP5]], [[TMP8]], !nosanitize [[META2]] // TR-NEXT: br i1 [[TMP9]], label %[[CONT:.*]], label %[[TRAP:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 3) #[[ATTR5:[0-9]+]], !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 3) #[[ATTR7:[0-9]+]], !nosanitize [[META2]] // TR-NEXT: unreachable, !nosanitize [[META2]] // TR: [[CONT]]: // TR-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] @@ -102,14 +102,14 @@ int div(int x, int y) { // CHECK-NEXT: ret i32 [[TMP2]] // // TR-LABEL: define dso_local i32 @null( -// TR-SAME: ptr noundef readonly captures(address_is_null) [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// TR-SAME: ptr noundef readonly captures(address_is_null) [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // TR-NEXT: [[ENTRY:.*:]] // TR-NEXT: [[TMP0:%.*]] = icmp eq ptr [[X]], null, !nosanitize [[META2]] // TR-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.allow.ubsan.check(i8 29), !nosanitize [[META2]] // TR-NEXT: [[DOTNOT1:%.*]] = and i1 [[TMP0]], [[TMP1]] // TR-NEXT: br i1 [[DOTNOT1]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF4:![0-9]+]], !nosanitize [[META2]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 22) #[[ATTR5]], !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 22) #[[ATTR7]], !nosanitize [[META2]] // TR-NEXT: unreachable, !nosanitize [[META2]] // TR: [[CONT]]: // TR-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]] @@ -161,7 +161,7 @@ int null(int* x) { // TR-NEXT: [[DOTDEMORGAN:%.*]] = and i1 [[TMP1]], [[TMP2]] // TR-NEXT: br i1 [[DOTDEMORGAN]], label %[[TRAP:.*]], label %[[CONT:.*]], !prof [[PROF4]], !nosanitize [[META2]] // TR: [[TRAP]]: -// TR-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR5]], !nosanitize [[META2]] +// TR-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR7]], !nosanitize [[META2]] // TR-NEXT: unreachable, !nosanitize [[META2]] // TR: [[CONT]]: // TR-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] @@ -212,11 +212,11 @@ void use(double*); // CHECK-NEXT: unreachable, !nosanitize [[META2]] // // TR-LABEL: define dso_local double @lbounds( -// TR-SAME: i32 noundef [[B:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] { +// TR-SAME: i32 noundef [[B:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // TR-NEXT: [[ENTRY:.*:]] // TR-NEXT: [[TMP0:%.*]] = zext i32 [[B]] to i64 // TR-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP0]], align 16 -// TR-NEXT: call void @use(ptr noundef nonnull [[VLA]]) #[[ATTR6:[0-9]+]] +// TR-NEXT: call void @use(ptr noundef nonnull [[VLA]]) #[[ATTR8:[0-9]+]] // TR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // TR-NEXT: [[TMP1:%.*]] = icmp ule i64 [[TMP0]], [[IDXPROM]] // TR-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 71), !nosanitize [[META2]] @@ -227,7 +227,7 @@ void use(double*); // TR-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]] // TR-NEXT: ret double [[TMP5]] // TR: [[TRAP]]: -// TR-NEXT: call void @llvm.ubsantrap(i8 71) #[[ATTR5]], !nosanitize [[META2]] +// TR-NEXT: call void @llvm.ubsantrap(i8 71) #[[ATTR7]], !nosanitize [[META2]] // TR-NEXT: unreachable, !nosanitize [[META2]] // // REC-LABEL: define dso_local double @lbounds( diff --git a/clang/test/CodeGen/target-builtin-error-3.c b/clang/test/CodeGen/target-builtin-error-3.c index 3de76e2..056dc94 100644 --- a/clang/test/CodeGen/target-builtin-error-3.c +++ b/clang/test/CodeGen/target-builtin-error-3.c @@ -24,6 +24,26 @@ static inline half16 __attribute__((__overloadable__)) convert_half( float16 a ) } void avx_test( uint16_t *destData, float16 argbF) { - // expected-warning@+1{{AVX vector argument of type 'float16' (vector of 16 'float' values) without 'avx512f' enabled changes the ABI}} ((half16U *)destData)[0] = convert_half(argbF); } + +half16 test( float16 a ) { + half16 r; + r.lo = convert_half(a.lo); + return r; +} +void avx_test2( uint16_t *destData, float16 argbF) +{ + // expected-warning@+1{{AVX vector argument of type 'float16' (vector of 16 'float' values) without 'avx512f' enabled changes the ABI}} + ((half16U *)destData)[0] = test(argbF); +} + +__attribute__((always_inline)) half16 test2( float16 a ) { + half16 r; + r.lo = convert_half(a.lo); + return r; +} +void avx_test3( uint16_t *destData, float16 argbF) +{ + ((half16U *)destData)[0] = test2(argbF); +} diff --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c index 9f2b0e9..3f6ada5 100644 --- a/clang/test/CodeGen/union-tbaa1.c +++ b/clang/test/CodeGen/union-tbaa1.c @@ -19,16 +19,16 @@ void bar(vect32 p[][2]); // CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]] // CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]] // CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]], i32 1 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i32 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: [[MUL6:%.*]] = mul i32 [[TMP2]], [[NUM]] -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]], i32 1 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i32 4 // CHECK-NEXT: store i32 [[MUL6]], ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16 // CHECK-NEXT: store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]], i32 1 -// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX14]], i32 2 +// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]] +// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX13]], i32 6 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]] // CHECK-NEXT: [[CONV16:%.*]] = zext i16 [[TMP5]] to i32 // CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i32 4 diff --git a/clang/test/CodeGenHLSL/RootSignature-Target.hlsl b/clang/test/CodeGenHLSL/RootSignature-Target.hlsl new file mode 100644 index 0000000..50e6bae --- /dev/null +++ b/clang/test/CodeGenHLSL/RootSignature-Target.hlsl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-rootsignature \ +// RUN: -hlsl-entry EntryRS -emit-llvm -o - %s | FileCheck %s + +// CHECK: !dx.rootsignatures = !{![[#ENTRY:]]} +// CHECK: ![[#ENTRY]] = !{null, ![[#ENTRY_RS:]], i32 2} +// CHECK: ![[#ENTRY_RS]] = !{![[#ROOT_CBV:]]} +// CHECK: ![[#ROOT_CBV]] = !{!"RootCBV", i32 0, i32 0, i32 0, i32 4} + +#define EntryRS "CBV(b0)" diff --git a/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl index 7956e40..92dba21 100644 --- a/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl +++ b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl @@ -1,9 +1,6 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \ // RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -// https://github.com/llvm/llvm-project/issues/156786 -// XFAIL: * - // This test verifies handling of multi-dimensional local arrays of resources // when used as a function argument and local variable. diff --git a/clang/test/CodeGenHLSL/semantics/DispatchThreadID-noindex.hlsl b/clang/test/CodeGenHLSL/semantics/DispatchThreadID-noindex.hlsl new file mode 100644 index 0000000..9ed5457 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/DispatchThreadID-noindex.hlsl @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error +// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error + +[shader("compute")] +[numthreads(8,8,1)] +void foo(uint Idx : SV_DispatchThreadID1) { + // expected-error@-1 {{semantic SV_DispatchThreadID does not allow indexing}} +} diff --git a/clang/test/CodeGenHLSL/semantics/SV_GroupID-noindex.hlsl b/clang/test/CodeGenHLSL/semantics/SV_GroupID-noindex.hlsl new file mode 100644 index 0000000..8fa0b07 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/SV_GroupID-noindex.hlsl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error +// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error + +[shader("compute")] +[numthreads(8,8,1)] +void foo(uint Idx : SV_GroupID1) { + // expected-error@-1 {{semantic SV_GroupID does not allow indexing}} +} + diff --git a/clang/test/CodeGenHLSL/semantics/SV_GroupThreadID-noindex.hlsl b/clang/test/CodeGenHLSL/semantics/SV_GroupThreadID-noindex.hlsl new file mode 100644 index 0000000..da72e85 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/SV_GroupThreadID-noindex.hlsl @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error +// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s -verify -verify-ignore-unexpected=note,error + +[shader("compute")] +[numthreads(8,8,1)] +void foo(uint Idx : SV_GroupThreadID1) { + // expected-error@-1 {{semantic SV_GroupThreadID does not allow indexing}} +} diff --git a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl index bdba38e..1bba87e 100644 --- a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl +++ b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-pixel -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s -// CHECK: @sv_position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 +// CHECK: @SV_Position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 // CHECK: define void @main() {{.*}} { float4 main(float4 p : SV_Position) { - // CHECK: %[[#P:]] = load <4 x float>, ptr addrspace(7) @sv_position, align 16 + // CHECK: %[[#P:]] = load <4 x float>, ptr addrspace(7) @SV_Position, align 16 // CHECK: %[[#R:]] = call spir_func <4 x float> @_Z4mainDv4_f(<4 x float> %[[#P]]) return p; } diff --git a/clang/test/CodeGenHLSL/semantics/missing.hlsl b/clang/test/CodeGenHLSL/semantics/missing.hlsl new file mode 100644 index 0000000..3ba725e --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/missing.hlsl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s -verify -verify-ignore-unexpected=note + +[numthreads(1,1,1)] +void main(unsigned GI) { + // expected-error@-1 {{semantic annotations must be present for all parameters of an entry function or patch constant function}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 23af19d..c357159 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -1064,6 +1064,174 @@ void test_sat_pk4_i4_i8(ushort *out, uint src) *out = __builtin_amdgcn_sat_pk4_u4_u8(src); } +// CHECK-LABEL: @test_get_cluster_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ +// CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +// CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] +// CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] +// CHECK-NEXT: ] +// CHECK: sw.bb: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.id.x() +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG:%.*]] +// CHECK: sw.bb1: +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.id.y() +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.bb2: +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.id.z() +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.default: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.epilog: +// CHECK-NEXT: ret void +// +void test_get_cluster_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_cluster_id_x(); break; + case 1: *out = __builtin_amdgcn_cluster_id_y(); break; + case 2: *out = __builtin_amdgcn_cluster_id_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_cluster_group_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ +// CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +// CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] +// CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] +// CHECK-NEXT: ] +// CHECK: sw.bb: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.x() +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG:%.*]] +// CHECK: sw.bb1: +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.y() +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.bb2: +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.z() +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.default: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.epilog: +// CHECK-NEXT: ret void +// +void test_get_cluster_group_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_cluster_workgroup_id_x(); break; + case 1: *out = __builtin_amdgcn_cluster_workgroup_id_y(); break; + case 2: *out = __builtin_amdgcn_cluster_workgroup_id_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_cluster_workgroup_flat_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.flat.id() +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4 +// CHECK-NEXT: ret void +// +void test_cluster_workgroup_flat_id(global uint *out) +{ + *out = __builtin_amdgcn_cluster_workgroup_flat_id(); +} + +// CHECK-LABEL: @test_get_cluster_workgroups_max_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ +// CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +// CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] +// CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] +// CHECK-NEXT: ] +// CHECK: sw.bb: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x() +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG:%.*]] +// CHECK: sw.bb1: +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y() +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.bb2: +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z() +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.default: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4 +// CHECK-NEXT: br label [[SW_EPILOG]] +// CHECK: sw.epilog: +// CHECK-NEXT: ret void +// +void test_get_cluster_workgroups_max_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_cluster_workgroup_max_id_x(); break; + case 1: *out = __builtin_amdgcn_cluster_workgroup_max_id_y(); break; + case 2: *out = __builtin_amdgcn_cluster_workgroup_max_id_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_cluster_workgroup_max_flat_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4 +// CHECK-NEXT: ret void +// +void test_get_cluster_workgroup_max_flat_id(global int *out) +{ + *out = __builtin_amdgcn_cluster_workgroup_max_flat_id(); +} + // CHECK-LABEL: @test_permlane16_swap( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index bf022bc..039d032 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -398,6 +398,384 @@ void test_s_sendmsghalt_var(int in) __builtin_amdgcn_s_sendmsghalt(1, in); } +// CHECK-LABEL: @test_wave_reduce_add_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_add_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_u64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_u64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 2); +} + // CHECK-LABEL: @test_s_barrier // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier( void test_s_barrier() diff --git a/clang/test/CodeGenSPIRV/spirv-intel.c b/clang/test/CodeGenSPIRV/spirv-intel.c index 3cfe09f..997cd6f 100644 --- a/clang/test/CodeGenSPIRV/spirv-intel.c +++ b/clang/test/CodeGenSPIRV/spirv-intel.c @@ -1,9 +1,11 @@ -// RUN: %clang_cc1 -triple spirv64-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITH %s -// RUN: %clang_cc1 -triple spirv32-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITH %s +// RUN: %clang_cc1 -triple spirv64-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITH-64 %s +// RUN: %clang_cc1 -triple spirv32-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITH-32 %s // RUN: %clang_cc1 -triple spir-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITHOUT %s // RUN: %clang_cc1 -triple spir64-intel %s -emit-llvm -o - | FileCheck -check-prefix=CHECK-WITHOUT %s -// CHECK-WITH: spir_func void @foo(ptr addrspace(4) noundef %param) #0 { +// CHECK-WITH-64: spir_func void @foo(ptr addrspace(4) noundef %param) addrspace(9) #0 { +// CHECK-WITH-32: spir_func void @foo(ptr addrspace(4) noundef %param) #0 { + // CHECK-WITHOUT: spir_func void @foo(ptr noundef %param) #0 { void foo(int *param) { } diff --git a/clang/test/DebugInfo/CXX/artificial-arg.cpp b/clang/test/DebugInfo/CXX/artificial-arg.cpp index a0cf131..21b8d04 100644 --- a/clang/test/DebugInfo/CXX/artificial-arg.cpp +++ b/clang/test/DebugInfo/CXX/artificial-arg.cpp @@ -25,7 +25,8 @@ int main(int argc, char **argv) { // CHECK: ![[CLASSTYPE:.*]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A", // CHECK-SAME: identifier: "_ZTS1A" // CHECK: ![[ARTARG:.*]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[CLASSTYPE]],{{.*}} DIFlagArtificial -// CHECK: !DISubprogram(name: "A", scope: ![[CLASSTYPE]] +// CHECK: !DISubprogram(name: "A" +// CHECK-SAME: scope: ![[CLASSTYPE]] // CHECK-SAME: line: 12 // CHECK-SAME: DIFlagPublic // CHECK: !DISubroutineType(types: [[FUNCTYPE:![0-9]*]]) diff --git a/clang/test/DebugInfo/CXX/local-structor-linkage-names.cpp b/clang/test/DebugInfo/CXX/local-structor-linkage-names.cpp new file mode 100644 index 0000000..4b4261e --- /dev/null +++ b/clang/test/DebugInfo/CXX/local-structor-linkage-names.cpp @@ -0,0 +1,29 @@ +// Tests that we emit don't emit unified constructor/destructor linkage names +// for function-local constructors. + +// Check with -gstructor-decl-linkage-names. +// RUN: %clang_cc1 -triple aarch64-apple-macosx -emit-llvm -debug-info-kind=standalone \ +// RUN: -gstructor-decl-linkage-names %s -o - | FileCheck %s --check-prefixes=CHECK +// +// Check with -gno-structor-decl-linkage-names. +// RUN: %clang_cc1 -triple aarch64-apple-macosx -emit-llvm -debug-info-kind=standalone \ +// RUN: -gno-structor-decl-linkage-names %s -o - | FileCheck %s --check-prefixes=CHECK + +struct HasNestedCtor { + HasNestedCtor(); +}; + +HasNestedCtor::HasNestedCtor() { + struct Local { + Local() {} + ~Local() {} + } l; +} + +// CHECK: !DISubprogram(name: "Local" +// CHECK-NOT: linkageName +// CHECK-SAME: ) + +// CHECK: !DISubprogram(name: "~Local" +// CHECK-NOT: linkageName +// CHECK-SAME: ) diff --git a/clang/test/DebugInfo/CXX/structor-linkage-names.cpp b/clang/test/DebugInfo/CXX/structor-linkage-names.cpp new file mode 100644 index 0000000..b7aac19 --- /dev/null +++ b/clang/test/DebugInfo/CXX/structor-linkage-names.cpp @@ -0,0 +1,89 @@ +// Tests that we emit unified constructor/destructor linkage names +// for ABIs that support it. + +// Check that -gstructor-decl-linkage-names is the default. +// RUN: %clang_cc1 -triple aarch64-apple-macosx -emit-llvm -debug-info-kind=standalone \ +// RUN: %s -o - | FileCheck %s --check-prefixes=CHECK,ITANIUM +// +// Check with -gstructor-decl-linkage-names. +// RUN: %clang_cc1 -triple aarch64-apple-macosx -emit-llvm -debug-info-kind=standalone \ +// RUN: -gstructor-decl-linkage-names %s -o - | FileCheck %s --check-prefixes=CHECK,ITANIUM +// +// Check with -gno-structor-decl-linkage-names. +// RUN: %clang_cc1 -triple aarch64-apple-macosx -emit-llvm -debug-info-kind=standalone \ +// RUN: -gno-structor-decl-linkage-names %s -o - | FileCheck %s --check-prefixes=CHECK,DISABLE +// +// Check ABI without structor variants. +// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -debug-info-kind=standalone \ +// RUN: -gstructor-decl-linkage-names %s -o - | FileCheck %s --check-prefixes=CHECK,MSABI + +struct Base { + Base(int x); + ~Base(); +}; + +Base::Base(int x) {} +Base::~Base() {} + +// Check that we emit unified ctor/dtor (C4/D4) on Itanium but not for MS-ABI. + +// CHECK: ![[BASE_CTOR_DECL:[0-9]+]] = !DISubprogram(name: "Base" +// MSABI-NOT: linkageName: +// DISABLE-NOT: linkageName: +// ITANIUM-SAME: linkageName: "_ZN4BaseC4Ei" +// CHECK-SAME: spFlags: 0 + +// CHECK: ![[BASE_DTOR_DECL:[0-9]+]] = !DISubprogram(name: "~Base" +// MSABI-NOT: linkageName: +// DISABLE-NOT: linkageName: +// ITANIUM-SAME: linkageName: "_ZN4BaseD4Ev" +// CHECK-SAME: spFlags: 0 + +// Check that the ctor/dtor definitions have linkage names that aren't +// the ones on the declaration. + +// CHECK: !DISubprogram(name: "Base" +// MSABI-SAME: linkageName: +// ITANIUM-SAME: linkageName: "_ZN4BaseC2Ei" +// CHECK-SAME: spFlags: DISPFlagDefinition +// CHECK-SAME: declaration: ![[BASE_CTOR_DECL]] + +// ITANIUM: !DISubprogram(name: "Base" +// ITANIUM-SAME: linkageName: "_ZN4BaseC1Ei" +// ITANIUM-SAME: spFlags: DISPFlagDefinition +// ITANIUM-SAME: declaration: ![[BASE_CTOR_DECL]] + +// CHECK: !DISubprogram(name: "~Base" +// MSABI-SAME: linkageName: +// ITANIUM-SAME: linkageName: "_ZN4BaseD2Ev" +// CHECK-SAME: spFlags: DISPFlagDefinition +// CHECK-SAME: declaration: ![[BASE_DTOR_DECL]] + +// ITANIUM: !DISubprogram(name: "~Base" +// ITANIUM-SAME: linkageName: "_ZN4BaseD1Ev" +// ITANIUM-SAME: spFlags: DISPFlagDefinition +// ITANIUM-SAME: declaration: ![[BASE_DTOR_DECL]] + +struct Derived : public Base { + using Base::Base; +} d(5); + +// CHECK: !DISubprogram(name: "Base" +// MSABI-SAME: linkageName: +// ITANIUM-SAME: linkageName: "_ZN7DerivedCI14BaseEi" +// CHECK-SAME: spFlags: {{.*}}DISPFlagDefinition +// CHECK-SAME: declaration: ![[BASE_INHERIT_CTOR_DECL:[0-9]+]] + +// CHECK: [[BASE_INHERIT_CTOR_DECL]] = !DISubprogram(name: "Base" +// MSABI-NOT: linkageName: +// DISABLE-NOT: linkageName: +// ITANIUM-SAME: linkageName: "_ZN7DerivedCI44BaseEi" +// CHECK-SAME spFlags: 0 + +// ITANIUM: !DISubprogram(name: "Base" +// ITANIUM-SAME: linkageName: "_ZN7DerivedCI24BaseEi" +// ITANIUM-SAME: spFlags: DISPFlagDefinition +// ITANIUM-SAME: declaration: ![[BASE_INHERIT_CTOR_DECL:[0-9]+]] + +// MSABI: !DISubprogram(name: "~Derived" +// DISABLE: !DISubprogram(name: "~Derived" diff --git a/clang/test/DebugInfo/ObjCXX/cyclic.mm b/clang/test/DebugInfo/ObjCXX/cyclic.mm index 2fb1611..a062b6a 100644 --- a/clang/test/DebugInfo/ObjCXX/cyclic.mm +++ b/clang/test/DebugInfo/ObjCXX/cyclic.mm @@ -10,8 +10,9 @@ struct B { // CHECK-SAME: identifier: // CHECK: ![[BMEMBERS]] = !{![[BB:[0-9]+]]} B(struct A *); -// CHECK: ![[BB]] = !DISubprogram(name: "B", scope: ![[B]] -// CHECK-SAME: line: [[@LINE-2]], +// CHECK: ![[BB]] = !DISubprogram(name: "B", +// CHECK-SAME: scope: ![[B]] +// CHECK-SAME: line: [[@LINE-3]], // CHECK-SAME: type: ![[TY:[0-9]+]], // CHECK: ![[TY]] = !DISubroutineType(types: ![[ARGS:[0-9]+]]) // CHECK: ![[ARGS]] = !{null, ![[THIS:[0-9]+]], !{{[^,]+}}} diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index e970868..2605076 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -54,11 +54,13 @@ // fpfast: -funsafe-math-optimizations // fpfast: -ffast-math -// RUN: %clang_cl /fp:fast /fp:precise -### -- %s 2>&1 | FileCheck -check-prefix=fpprecise %s +// RUN: %clang_cl /fp:fast /fp:precise -Wno-overriding-complex-range -### -- %s 2>&1 | \ +// RUN: FileCheck -check-prefix=fpprecise %s // fpprecise-NOT: -funsafe-math-optimizations // fpprecise-NOT: -ffast-math -// RUN: %clang_cl /fp:fast /fp:strict -### -- %s 2>&1 | FileCheck -check-prefix=fpstrict %s +// RUN: %clang_cl /fp:fast /fp:strict -Wno-overriding-complex-range -### -- %s 2>&1 | \ +// RUN: FileCheck -check-prefix=fpstrict %s // fpstrict-NOT: -funsafe-math-optimizations // fpstrict-NOT: -ffast-math // fpstrict: -ffp-contract=off diff --git a/clang/test/Driver/dxc_frs.hlsl b/clang/test/Driver/dxc_frs.hlsl new file mode 100644 index 0000000..767cab6 --- /dev/null +++ b/clang/test/Driver/dxc_frs.hlsl @@ -0,0 +1,10 @@ +// RUN: %clang_dxc -T cs_6_0 /Fo %t.dxo /Frs %t.rs.dxo -### %s 2>&1 | FileCheck %s + +// Test to demonstrate extracting the root signature to the specified +// output file with /Frs. + +// CHECK: "{{.*}}llvm-objcopy{{(.exe)?}}" "{{.*}}.obj" "{{.*}}.dxo" "--extract-section=RTS0={{.*}}.rs.dxo" + +[shader("compute"), RootSignature("")] +[numthreads(1,1,1)] +void EmptyEntry() {} diff --git a/clang/test/Driver/dxc_rootsignature_target.hlsl b/clang/test/Driver/dxc_rootsignature_target.hlsl new file mode 100644 index 0000000..08cd1ab --- /dev/null +++ b/clang/test/Driver/dxc_rootsignature_target.hlsl @@ -0,0 +1,8 @@ +// RUN: %clang_dxc -E EntryRS -T rootsig_1_1 /Fo %t.dxo -### %s 2>&1 | FileCheck %s --check-prefix=CMDS + +// CMDS: "{{.*}}clang{{.*}}" "-cc1" +// CMDS-SAME: "-triple" "dxilv1.1-unknown-shadermodel1.1-rootsignature" +// CMDS-SAME: "-hlsl-entry" "EntryRS" +// CMDS: "{{.*}}llvm-objcopy{{(.exe)?}}" "{{.*}}.dxo" "--only-section=RTS0" + +#define EntryRS "UAV(u0)" diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c index 6f17d4a..9cf5d0c 100644 --- a/clang/test/Driver/fp-model.c +++ b/clang/test/Driver/fp-model.c @@ -81,8 +81,7 @@ // WARN12: warning: overriding '-ffp-model=strict' option with '-Ofast' // RUN: %clang -### -ffast-math -ffp-model=strict -c %s 2>&1 | FileCheck \ -// RUN: --check-prefix=WARN-CX-BASIC-TO-FULL %s -// WARN-CX-BASIC-TO-FULL: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=full' +// RUN: --check-prefix=CHECK-FASTMATH-FPM-STRICT %s // RUN: %clang -### -ffp-model=strict -fapprox-func -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN13 %s @@ -205,7 +204,7 @@ // RUN: %clang -### -nostdinc -ffast-math -ffp-model=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FASTMATH-FPM-FAST %s -// CHECK-FASTMATH-FPM-FAST: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=promoted' +// CHECK-FASTMATH-FPM-FAST: warning: '-ffp-model=fast' sets complex range to "promoted" overriding the setting of "basic" that was implied by '-ffast-math' // CHECK-FASTMATH-FPM-FAST: "-cc1" // CHECK-FASTMATH-FPM-FAST-NOT: "-menable-no-infs" // CHECK-FASTMATH-FPM-FAST-NOT: "-menable-no-nans" @@ -221,7 +220,8 @@ // CHECK-FASTMATH-FPM-FAST-SAME: "-complex-range=promoted" // RUN: %clang -### -nostdinc -ffast-math -ffp-model=precise -c %s 2>&1 \ -// RUN: | FileCheck --check-prefixes=CHECK-FASTMATH-FPM-PRECISE,WARN-CX-BASIC-TO-FULL %s +// RUN: | FileCheck --check-prefixes=CHECK-FASTMATH-FPM-PRECISE %s +// CHECK-FASTMATH-FPM-PRECISE: warning: '-ffp-model=precise' sets complex range to "full" overriding the setting of "basic" that was implied by '-ffast-math' // CHECK-FASTMATH-FPM-PRECISE: "-cc1" // CHECK-FASTMATH-FPM-PRECISE-NOT: "-menable-no-infs" // CHECK-FASTMATH-FPM-PRECISE-NOT: "-menable-no-nans" @@ -237,7 +237,8 @@ // CHECK-FASTMATH-FPM-PRECISE-SAME: "-complex-range=full" // RUN: %clang -### -nostdinc -ffast-math -ffp-model=strict -c %s 2>&1 \ -// RUN: | FileCheck --check-prefixes=CHECK-FASTMATH-FPM-STRICT,WARN-CX-BASIC-TO-FULL %s +// RUN: | FileCheck --check-prefixes=CHECK-FASTMATH-FPM-STRICT %s +// CHECK-FASTMATH-FPM-STRICT: warning: '-ffp-model=strict' sets complex range to "full" overriding the setting of "basic" that was implied by '-ffast-math' // CHECK-FASTMATH-FPM-STRICT: "-cc1" // CHECK-FASTMATH-FPM-STRICT-NOT: "-menable-no-infs" // CHECK-FASTMATH-FPM-STRICT-NOT: "-menable-no-nans" diff --git a/clang/test/Driver/fpatchable-function-entry.c b/clang/test/Driver/fpatchable-function-entry.c index 43be6c5..5248a7c0 100644 --- a/clang/test/Driver/fpatchable-function-entry.c +++ b/clang/test/Driver/fpatchable-function-entry.c @@ -8,6 +8,7 @@ // RUN: %clang --target=riscv64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang --target=powerpc-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang --target=powerpc64-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +// RUN: %clang --target=powerpc64le-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // CHECK: "-fpatchable-function-entry=1" // RUN: %clang --target=aarch64 -fsyntax-only %s -fpatchable-function-entry=1,1 -c -### 2>&1 | FileCheck --check-prefix=11 %s diff --git a/clang/test/Driver/frame-pointer-elim.c b/clang/test/Driver/frame-pointer-elim.c index 6e21671..6d71982 100644 --- a/clang/test/Driver/frame-pointer-elim.c +++ b/clang/test/Driver/frame-pointer-elim.c @@ -73,12 +73,12 @@ // RUN: %clang -### -target armv7s-apple-ios -fomit-frame-pointer %s 2>&1 | \ // RUN: FileCheck --check-prefix=WARN-OMIT-7S %s // WARN-OMIT-7S: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7s' -// WARN-OMIT-7S: "-mframe-pointer=all" +// WARN-OMIT-7S: "-mframe-pointer=non-leaf" // RUN: %clang -### -target armv7k-apple-watchos -fomit-frame-pointer %s 2>&1 | \ // RUN: FileCheck --check-prefix=WARN-OMIT-7K %s // WARN-OMIT-7K: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7k' -// WARN-OMIT-7K: "-mframe-pointer=all" +// WARN-OMIT-7K: "-mframe-pointer=non-leaf" // RUN: %clang -### -target armv7s-apple-ios8.0 -momit-leaf-frame-pointer %s 2>&1 | \ // RUN: FileCheck --check-prefix=WARN-OMIT-LEAF-7S %s @@ -190,22 +190,34 @@ // RUN: FileCheck --check-prefix=KEEP-NONE %s // Check that for Apple bare metal targets, we're keeping frame pointers by default -// RUN: %clang -### --target=thumbv6m-apple-none-macho -S %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s -// RUN: %clang -### --target=thumbv6m-apple-none-macho -S -fno-omit-frame-pointer %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s +// RUN: %clang -### --target=armv6m-apple-none-macho -S %s 2>&1 | \ +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s +// RUN: %clang -### --target=armv6m-apple-none-macho -S -fno-omit-frame-pointer %s 2>&1 | \ +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### --target=arm-apple-none-macho -S %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### --target=arm-apple-none-macho -S -fno-omit-frame-pointer %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s -// RUN: %clang -### --target=thumbv6m-apple-none-macho -S -O1 %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s -// RUN: %clang -### --target=thumbv6m-apple-none-macho -S -O1 -fno-omit-frame-pointer %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s +// RUN: %clang -### --target=armv6m-apple-none-macho -S -O1 %s 2>&1 | \ +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s +// RUN: %clang -### --target=armv6m-apple-none-macho -S -O1 -fno-omit-frame-pointer %s 2>&1 | \ +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### --target=arm-apple-none-macho -S -O1 %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### --target=arm-apple-none-macho -S -O1 -fno-omit-frame-pointer %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-ALL %s +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s + +// RUN: %clang --target=armv7-apple-macho -### -S %s 2>&1 \ +// RUN: -fomit-frame-pointer \ +// RUN: | FileCheck -check-prefix=KEEP-NONE %s + +// RUN: %clang --target=armv7-apple-macho -### -S %s 2>&1 \ +// RUN: -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer \ +// RUN: | FileCheck -check-prefix=KEEP-ALL %s + +// RUN: %clang --target=armv7-apple-macho -### -S %s 2>&1 \ +// RUN: -fomit-frame-pointer -mno-omit-leaf-frame-pointer \ +// RUN: | FileCheck -check-prefix=KEEP-NONE %s // AArch64 bare metal targets behave like hosted targets // RUN: %clang -### --target=aarch64-none-elf -S %s 2>&1 | \ diff --git a/clang/test/Driver/frame-pointer.c b/clang/test/Driver/frame-pointer.c index 2015fa5..2259246 100644 --- a/clang/test/Driver/frame-pointer.c +++ b/clang/test/Driver/frame-pointer.c @@ -80,12 +80,16 @@ // RUN: %clang --target=loongarch64 -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s // RUN: %clang --target=loongarch64 -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s +// RUN: %clang --target=armv7-apple-macho -### -S %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK-MACHO-32 %s + // CHECK0-32: -mframe-pointer=all // CHECK1-32-NOT: -mframe-pointer=all // CHECK2-32-NOT: -mframe-pointer=all // CHECK3-32-NOT: -mframe-pointer=all // CHECKs-32-NOT: -mframe-pointer=all +// CHECK-MACHO-32: -mframe-pointer=non-leaf + // CHECK0-64: -mframe-pointer=all // CHECK1-64-NOT: -mframe-pointer=all // CHECK2-64-NOT: -mframe-pointer=all diff --git a/clang/test/Driver/range-warnings.c b/clang/test/Driver/range-warnings.c new file mode 100644 index 0000000..79650f8 --- /dev/null +++ b/clang/test/Driver/range-warnings.c @@ -0,0 +1,606 @@ +// Test overriding warnings about complex range. +// range.c tests the settings of -complex-range=, and this test covers +// all warnings related to complex range. + +// Clang options related to complex range are as follows: +// -f[no-]fast-math +// -f[no-]cx-limited-range +// -f[no-]cx-fortran-rules +// -fcomplex-arithmetic=[full|improved|promoted|basic] +// -ffp-model=[strict|precise|fast|aggressive] + +// Emit warnings about overriding when options implying different +// complex ranges are specified. However, warnings are not emitted in +// the following cases: +// (a) When the positive/negative form or a different value of the same +// option is specified. +// Example: +// `-ffast-math -fno-fast-math` +// `-fcx-limited-range -fno-cx-limited-range` +// `-fcx-fortran-rules -fno-cx-fortran-rules` +// `-fcomplex-arithmetic=full -fcomplex-arithmetic=improved` +// `-ffp-model=strict -ffp-model=aggressive` +// +// (b) When -ffp-model= is overridden by -f[no-]fast-math. +// Example: +// `-ffp-model=fast -fno-fast-math` +// `-ffp-model=strict -ffast-math` + + +// RUN: %clang -### -Werror -ffast-math -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffast-math -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffast-math -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-FULL-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffast-math -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffast-math -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffast-math -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,FAST-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffast-math -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-fast-math -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcx-limited-range -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcx-limited-range -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFAST-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcx-limited-range -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcx-limited-range -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-FULL-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcx-limited-range -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcx-limited-range -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-limited-range -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,LIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcx-limited-range -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-limited-range -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-limited-range -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-limited-range -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-limited-range -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-limited-range -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-limited-range -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-limited-range -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-limited-range -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,NOLIM-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFAST-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcx-fortran-rules -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcx-fortran-rules -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-FULL-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcx-fortran-rules -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcx-fortran-rules -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fcx-fortran-rules -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,FORT-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-fortran-rules -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-fortran-rules -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-fortran-rules -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-fortran-rules -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-fortran-rules -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fno-cx-fortran-rules -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fno-cx-fortran-rules -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -fno-cx-fortran-rules -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,NOFORT-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=full -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,ARITH-FULL-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=full -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,ARITH-FULL-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=full -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,ARITH-FULL-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=full -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=full -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,ARITH-FULL-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=full -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,ARITH-FULL-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFAST-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=improved -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=improved -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=improved -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=improved -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=improved -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,ARITH-IMPROVED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FAST-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFAST-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=promoted -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=promoted -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=promoted -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=promoted -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=promoted -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-AGGRESSIVE-OVERRIDING,ARITH-PROMOTED-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFAST-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-STRICT-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-PRECISE-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -fcomplex-arithmetic=basic -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=MODEL-FAST-OVERRIDING,ARITH-BASIC-OVERRIDDEN %s + +// RUN: %clang -### -Werror -fcomplex-arithmetic=basic -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=strict -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,MODEL-STRICT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=strict -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,MODEL-STRICT-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=strict -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=strict -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,MODEL-STRICT-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=strict -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,MODEL-STRICT-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=strict -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,MODEL-STRICT-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=strict -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=strict -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=precise -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=precise -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=precise -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,MODEL-PRECISE-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=precise -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=precise -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,MODEL-PRECISE-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=precise -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=precise -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=precise -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,MODEL-PRECISE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=precise -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,MODEL-PRECISE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=precise -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,MODEL-PRECISE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=precise -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=precise -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=precise -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=fast -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=fast -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=fast -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=LIM-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-FULL-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=fast -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=fast -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-BASIC-OVERRIDING,MODEL-FAST-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=fast -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=fast -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=fast -ffp-model=aggressive -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=aggressive -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=aggressive -fno-fast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -Werror -ffp-model=aggressive -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=aggressive -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOLIM-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=aggressive -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=FORT-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=aggressive -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NOFORT-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=aggressive -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-FULL-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=aggressive -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-IMPROVED-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -ffp-model=aggressive -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=ARITH-PROMOTED-OVERRIDING,MODEL-AGGRESSIVE-OVERRIDDEN %s + +// RUN: %clang -### -Werror -ffp-model=aggressive -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=aggressive -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=aggressive -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + +// RUN: %clang -### -ffp-model=aggressive -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=NO-OVR-WARN %s + + +// NO-OVR-WARN-NOT: [-Woverriding-complex-range] + +// FAST-OVERRIDING: warning: '-ffast-math' sets complex range to "basic" +// NOFAST-OVERRIDING: warning: '-fno-fast-math' sets complex range to "none" +// LIM-OVERRIDING: warning: '-fcx-limited-range' sets complex range to "basic" +// NOLIM-OVERRIDING: warning: '-fno-cx-limited-range' sets complex range to "full" +// FORT-OVERRIDING: warning: '-fcx-fortran-rules' sets complex range to "improved" +// NOFORT-OVERRIDING: warning: '-fno-cx-fortran-rules' sets complex range to "full" +// ARITH-FULL-OVERRIDING: warning: '-fcomplex-arithmetic=full' sets complex range to "full" +// ARITH-IMPROVED-OVERRIDING: warning: '-fcomplex-arithmetic=improved' sets complex range to "improved" +// ARITH-PROMOTED-OVERRIDING: warning: '-fcomplex-arithmetic=promoted' sets complex range to "promoted" +// ARITH-BASIC-OVERRIDING: warning: '-fcomplex-arithmetic=basic' sets complex range to "basic" +// MODEL-STRICT-OVERRIDING: warning: '-ffp-model=strict' sets complex range to "full" +// MODEL-PRECISE-OVERRIDING: warning: '-ffp-model=precise' sets complex range to "full" +// MODEL-FAST-OVERRIDING: warning: '-ffp-model=fast' sets complex range to "promoted" +// MODEL-AGGRESSIVE-OVERRIDING: warning: '-ffp-model=aggressive' sets complex range to "basic" + +// FAST-OVERRIDDEN: overriding the setting of "basic" that was implied by '-ffast-math' [-Woverriding-complex-range] +// LIM-OVERRIDDEN: overriding the setting of "basic" that was implied by '-fcx-limited-range' [-Woverriding-complex-range] +// NOLIM-OVERRIDDEN: overriding the setting of "full" that was implied by '-fno-cx-limited-range' [-Woverriding-complex-range] +// FORT-OVERRIDDEN: overriding the setting of "improved" that was implied by '-fcx-fortran-rules' [-Woverriding-complex-range] +// NOFORT-OVERRIDDEN: overriding the setting of "full" that was implied by '-fno-cx-fortran-rules' [-Woverriding-complex-range] +// ARITH-FULL-OVERRIDDEN: overriding the setting of "full" that was implied by '-fcomplex-arithmetic=full' [-Woverriding-complex-range] +// ARITH-IMPROVED-OVERRIDDEN: overriding the setting of "improved" that was implied by '-fcomplex-arithmetic=improved' [-Woverriding-complex-range] +// ARITH-PROMOTED-OVERRIDDEN: overriding the setting of "promoted" that was implied by '-fcomplex-arithmetic=promoted' [-Woverriding-complex-range] +// ARITH-BASIC-OVERRIDDEN: overriding the setting of "basic" that was implied by '-fcomplex-arithmetic=basic' [-Woverriding-complex-range] +// MODEL-STRICT-OVERRIDDEN: overriding the setting of "full" that was implied by '-ffp-model=strict' [-Woverriding-complex-range] +// MODEL-PRECISE-OVERRIDDEN: overriding the setting of "full" that was implied by '-ffp-model=precise' [-Woverriding-complex-range] +// MODEL-FAST-OVERRIDDEN: overriding the setting of "promoted" that was implied by '-ffp-model=fast' [-Woverriding-complex-range] +// MODEL-AGGRESSIVE-OVERRIDDEN: overriding the setting of "basic" that was implied by '-ffp-model=aggressive' [-Woverriding-complex-range] diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index 30140f3..bcad88e 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -6,12 +6,6 @@ // RUN: %clang -### -target x86_64 -fno-cx-limited-range -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcx-limited-range -fcx-fortran-rules \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN1 %s - -// RUN: %clang -### -target x86_64 -fno-cx-limited-range -fcx-fortran-rules \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN2 %s - // RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s @@ -24,9 +18,6 @@ // RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -fno-cx-limited-range \ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-fortran-rules \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN4 %s - // RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=IMPRVD %s @@ -36,25 +27,12 @@ // RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \ // RUN: -fno-cx-fortran-rules | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcx-fortran-rules -fno-cx-limited-range \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN3 %s - // RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -c %s 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -### -target x86_64 -fcx-limited-range -fcx-fortran-rules \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN1 %s - -// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-fortran-rules \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN4 %s - // RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcx-fortran-rules \ -// RUN: -fcx-limited-range -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN20 %s - // RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s @@ -90,14 +68,6 @@ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=IMPRVD %s -// RUN: %clang -### -target x86_64 -fcx-limited-range \ -// RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN6 %s - -// RUN: %clang -### -target x86_64 -fcx-fortran-rules \ -// RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN7 %s - // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=FULL %s @@ -124,10 +94,6 @@ // RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ -// RUN: -fcx-limited-range -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN14 %s - -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=IMPRVD %s @@ -140,9 +106,6 @@ // RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ -// RUN: -ffast-math -c %s 2>&1 | FileCheck --check-prefix=WARN17 %s - -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=IMPRVD %s @@ -178,13 +141,13 @@ // RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### --target=x86_64 -fcx-limited-range -fno-fast-math \ -// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE,WARN21 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### -Werror --target=x86_64 -fno-cx-limited-range -fno-fast-math \ // RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### --target=x86_64 -fcx-fortran-rules -fno-fast-math \ -// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE,WARN22 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### -Werror --target=x86_64 -fno-cx-fortran-rules -fno-fast-math \ // RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s @@ -193,13 +156,13 @@ // RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### --target=x86_64 -fcomplex-arithmetic=basic -fno-fast-math \ -// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE,WARN23 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### --target=x86_64 -fcomplex-arithmetic=promoted -fno-fast-math \ -// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE,WARN24 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### --target=x86_64 -fcomplex-arithmetic=improved -fno-fast-math \ -// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE,WARN25 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s // RUN: %clang -### -Werror --target=x86_64 -fcomplex-arithmetic=full -fno-fast-math \ // RUN: -c %s 2>&1 | FileCheck --check-prefixes=RANGE %s @@ -255,22 +218,6 @@ // RUN: %clang -### -Werror --target=x86_64 -fno-fast-math -ffp-model=strict \ // RUN: -c %s 2>&1 | FileCheck --check-prefixes=FULL %s -// WARN1: warning: overriding '-fcx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] -// WARN2: warning: overriding '-fno-cx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] -// WARN3: warning: overriding '-fcx-fortran-rules' option with '-fno-cx-limited-range' [-Woverriding-option] -// WARN4: warning: overriding '-fcx-limited-range' option with '-fno-cx-fortran-rules' [-Woverriding-option] -// WARN5: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN6: warning: overriding '-fcx-limited-range' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN7: warning: overriding '-fcx-fortran-rules' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN14: overriding '-complex-range=promoted' option with '-fcx-limited-range' [-Woverriding-option] -// WARN17: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN20: warning: overriding '-fcx-fortran-rules' option with '-fcx-limited-range' [-Woverriding-option] -// WARN21: warning: overriding '-fcx-limited-range' option with '-fno-fast-math' [-Woverriding-option] -// WARN22: warning: overriding '-fcx-fortran-rules' option with '-fno-fast-math' [-Woverriding-option] -// WARN23: warning: overriding '-fcomplex-arithmetic=basic' option with '-fno-fast-math' [-Woverriding-option] -// WARN24: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fno-fast-math' [-Woverriding-option] -// WARN25: warning: overriding '-fcomplex-arithmetic=improved' option with '-fno-fast-math' [-Woverriding-option] - // BASIC: -complex-range=basic // FULL: -complex-range=full // PRMTD: -complex-range=promoted diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index ea0821c..88ec766 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -401,12 +401,16 @@ // -march overwrite -mcpu's default -march // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=sifive-e31 -march=rv32imc | FileCheck -check-prefix=MCPU-MARCH %s -// MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+c" +// MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" +// MCPU-MARCH: "-target-feature" "+m" "-target-feature" "+c" // MCPU-MARCH: "-target-abi" "ilp32" // -march=unset erases previous march // RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rv32imc -march=unset -mcpu=sifive-e31 | FileCheck -check-prefix=MARCH-UNSET %s -// MARCH-UNSET: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+c" +// MARCH-UNSET: "-nostdsysteminc" "-target-cpu" "sifive-e31" +// MARCH-UNSET: "-target-feature" "+m" +// MARCH-UNSET: "-target-feature" "+a" +// MARCH-UNSET: "-target-feature" "+c" // MARCH-UNSET-SAME: "-target-abi" "ilp32" // Check interaction between -mcpu and mtune, -mtune won't affect arch related diff --git a/clang/test/Driver/riscv-default-features.c b/clang/test/Driver/riscv-default-features.c index 4c3883c..f127f30 100644 --- a/clang/test/Driver/riscv-default-features.c +++ b/clang/test/Driver/riscv-default-features.c @@ -1,8 +1,8 @@ // RUN: %clang --target=riscv32-unknown-elf -S -emit-llvm %s -o - | FileCheck %s -check-prefix=RV32 // RUN: %clang --target=riscv64-unknown-elf -S -emit-llvm %s -o - | FileCheck %s -check-prefix=RV64 -// RV32: "target-features"="+32bit,+a,+c,+m,+relax, -// RV64: "target-features"="+64bit,+a,+c,+m,+relax, +// RV32: "target-features"="+32bit,+a,+c,+i,+m,+relax, +// RV64: "target-features"="+64bit,+a,+c,+i,+m,+relax, // Dummy function int foo(void){ diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c index 80dec2c..1c8b52b 100644 --- a/clang/test/Driver/riscv-features.c +++ b/clang/test/Driver/riscv-features.c @@ -1,3 +1,4 @@ + // RUN: %clang --target=riscv32-unknown-elf -### %s -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -### %s -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-linux-android -### %s -fsyntax-only 2>&1 | FileCheck %s -check-prefixes=ANDROID,DEFAULT,FAST-SCALAR-UNALIGNED-ACCESS,FAST-VECTOR-UNALIGNED-ACCESS @@ -85,3 +86,14 @@ // FUCHSIA-SAME: "-target-feature" "+zbb" // FUCHSIA-SAME: "-target-feature" "+zbs" + +// RUN: %clang --target=riscv32-unknown-elf -### -march=rv32i %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=RVI +// RUN: %clang --target=riscv32-unknown-elf -### -march=rv64i %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=RVI +// RUN: %clang --target=riscv32-unknown-elf -### -march=rv32e %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=RVE +// RUN: %clang --target=riscv32-unknown-elf -### -march=rv64e %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=RVE + +// RVI: "-target-feature" "+i" +// RVI-SAME: "-target-feature" "-e" + +// RVE: "-target-feature" "+e" +// RVE-SAME: "-target-feature" "-i" diff --git a/clang/test/Driver/target-override.c b/clang/test/Driver/target-override.c index 775ca48..5bd88e1 100644 --- a/clang/test/Driver/target-override.c +++ b/clang/test/Driver/target-override.c @@ -1,6 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows -// REQUIRES: x86-registered-target +// REQUIRES: x86-registered-target, symlinks // RUN: rm -rf %t && mkdir %t // RUN: ln -s %clang %t/i386-clang diff --git a/clang/test/Frontend/dependency-gen-symlink.c b/clang/test/Frontend/dependency-gen-symlink.c index 39a976a..b88fb7f 100644 --- a/clang/test/Frontend/dependency-gen-symlink.c +++ b/clang/test/Frontend/dependency-gen-symlink.c @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // Basic test // RUN: rm -rf %t.dir diff --git a/clang/test/Headers/__cpuidex_conflict.c b/clang/test/Headers/__cpuidex_conflict.c index 67f2a0c..a928aa8 100644 --- a/clang/test/Headers/__cpuidex_conflict.c +++ b/clang/test/Headers/__cpuidex_conflict.c @@ -6,6 +6,9 @@ // Ensure that we do not run into conflicts when offloading. // RUN: %clang_cc1 %s -DIS_STATIC=static -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu // RUN: %clang_cc1 -DIS_STATIC="" -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o - +// RUN: %clang_cc1 -DIS_STATIC="" -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o - +// RUN: %clang_cc1 -DIS_STATIC="" -triple spirv64 -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o - +// RUN: %clang_cc1 -DIS_STATIC="" -triple spirv64 -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fsycl-is-device %s -o - typedef __SIZE_TYPE__ size_t; diff --git a/clang/test/Modules/added-visible-decls.cppm b/clang/test/Modules/added-visible-decls.cppm new file mode 100644 index 0000000..2f387db --- /dev/null +++ b/clang/test/Modules/added-visible-decls.cppm @@ -0,0 +1,57 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -fsyntax-only -verify + +//--- a.h +template <typename T> +struct A { + static const T value0; + static const T value1; + + constexpr T get0() { + return value0; + } + + constexpr T get1() { + return value1; + } +}; + +template <typename T> +const T A<T>::value0 = T(43); +template <typename T> +const T A<T>::value1 = T(44); + +//--- a.cppm +module; +#include "a.h" +export module a; +export using ::A; + +//--- b.cppm +export module b; +export import a; + +export constexpr int bar() { + return A<int>().get0(); +} + +//--- c.cppm +export module c; +export import b; + +export constexpr int foo() { + return A<int>().get1() + A<int>().get0(); +} + +//--- d.cpp +// expected-no-diagnostics + +import c; + +static_assert(bar() + foo() == 130); + diff --git a/clang/test/Modules/crash-vfs-path-symlink-topheader.m b/clang/test/Modules/crash-vfs-path-symlink-topheader.m index 8c45d77..bab754f 100644 --- a/clang/test/Modules/crash-vfs-path-symlink-topheader.m +++ b/clang/test/Modules/crash-vfs-path-symlink-topheader.m @@ -1,6 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows -// REQUIRES: crash-recovery +// REQUIRES: crash-recovery, symlinks // FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? // XFAIL: target={{.*-windows-gnu}} diff --git a/clang/test/Modules/framework-name.m b/clang/test/Modules/framework-name.m index 3114c7d..3e0c459 100644 --- a/clang/test/Modules/framework-name.m +++ b/clang/test/Modules/framework-name.m @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t.mcp %t // RUN: mkdir -p %t diff --git a/clang/test/Modules/module-symlink.m b/clang/test/Modules/module-symlink.m index 633f247..9de1cf9 100644 --- a/clang/test/Modules/module-symlink.m +++ b/clang/test/Modules/module-symlink.m @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules-cache-path=%t/modules -fmodules -fimplicit-module-maps -I %S/Inputs -emit-pch -o %t.pch %s -verify diff --git a/clang/test/Modules/modules-cache-path-canonicalization-output.c b/clang/test/Modules/modules-cache-path-canonicalization-output.c new file mode 100644 index 0000000..ad71b69 --- /dev/null +++ b/clang/test/Modules/modules-cache-path-canonicalization-output.c @@ -0,0 +1,18 @@ +// This checks that implicitly-built modules produce identical PCM +// files regardless of the spelling of the same module cache path. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \ +// RUN: -fmodules-cache-path=%t/cache -fdisable-module-hash +// RUN: mv %t/cache/M.pcm %t/M.pcm +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \ +// RUN: -fmodules-cache-path=%t/./cache -fdisable-module-hash +// RUN: diff %t/./cache/M.pcm %t/M.pcm + +//--- tu.c +#include "M.h" +//--- M.h +//--- module.modulemap +module M { header "M.h" } diff --git a/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp deleted file mode 100644 index 806a79e..0000000 --- a/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp +++ /dev/null @@ -1,1095 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 - -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1 -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 - -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 - -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -template<typename tx> -tx ftemplate(int n) { - tx a = 0; - short aa = 0; - tx b[10]; - - #pragma omp target parallel map(tofrom: aa) num_threads(1024) - { - aa += 1; - } - #ifdef OMP60 - char str[] = "msg"; - #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024) severity(warning) message(str) - { - aa += 1; - } - #endif - - #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) - { - a += 1; - aa += 1; - b[2] += 1; - } - #ifdef OMP60 - const char *str1 = "msg1"; - #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n) severity(warning) message(str1) - { - a += 1; - aa += 1; - b[2] += 1; - } - #endif - - return a; -} - -int bar(int n){ - int a = 0; - - a += ftemplate<int>(n); - - return a; -} - -#endif -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] -// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_1: user_code.entry: -// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// OMP45_1-NEXT: call void @__kmpc_target_deinit() -// OMP45_1-NEXT: ret void -// OMP45_1: worker.exit: -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_1: user_code.entry: -// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// OMP45_1-NEXT: call void @__kmpc_target_deinit() -// OMP45_1-NEXT: ret void -// OMP45_1: worker.exit: -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP45_1-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] -// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_2: user_code.entry: -// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// OMP45_2-NEXT: call void @__kmpc_target_deinit() -// OMP45_2-NEXT: ret void -// OMP45_2: worker.exit: -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_2: user_code.entry: -// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// OMP45_2-NEXT: call void @__kmpc_target_deinit() -// OMP45_2-NEXT: ret void -// OMP45_2: worker.exit: -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP45_2-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] -// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8, !nonnull [[META9]] -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// OMP60_1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !nonnull [[META9]] -// OMP60_1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP5]], i64 0, i64 0 -// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP3]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 1, ptr [[ARRAYDECAY]]) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR4]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR2]] to ptr -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2_ASCAST]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 1, ptr [[TMP9]]) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] -// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8, !nonnull [[META9]] -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// OMP60_2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !nonnull [[META9]] -// OMP60_2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP5]], i64 0, i64 0 -// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP3]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 1, ptr [[ARRAYDECAY]]) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR4]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR2]] to ptr -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_kernel_environment to ptr), ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2_ASCAST]], align 8 -// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 1, ptr [[TMP9]]) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] -// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() -// CHECK1-NEXT: ret void -// CHECK1: worker.exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit() -// CHECK1-NEXT: ret void -// CHECK1: worker.exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) -// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit() -// CHECK2-NEXT: ret void -// CHECK2: worker.exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) -// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment to ptr), ptr [[DYN_PTR]]) -// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit() -// CHECK2-NEXT: ret void -// CHECK2: worker.exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr -// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: ret void -// diff --git a/clang/test/OpenMP/for_lst_private_codegen_c.c b/clang/test/OpenMP/for_lst_private_codegen_c.c new file mode 100644 index 0000000..c56c5b6 --- /dev/null +++ b/clang/test/OpenMP/for_lst_private_codegen_c.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -verify -x c -triple x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=52 -emit-llvm -o - %s | FileCheck %s +// expected-no-diagnostics + +#define N 100 +int A[N]; + +void condlastprivate() { + int x, y, z, k; + x = y = z = k = 0; + + #pragma omp parallel for lastprivate(conditional: x,y,z) lastprivate(k) + for (k = 0; k < N; k++) { + if ((k > 2) && (k < 6)) { + x = A[k]; + z = A[k] + 111; + } else { + y = A[k] + 222; + } + } +} + +int main() { + for (int i = 0; i < N; i++) + A[i] = i; + condlastprivate(); + return 0; +} + +// CHECK: @.pl_cond.x_[[ID:[0-9]+]].iv = common global i32 0, align 4 +// CHECK: @pl_cond.x_[[ID]] = common global i32 0, align 4 +// CHECK: @.gomp_critical_user_pl_cond.x_[[ID]].var = common global [8 x i32] zeroinitializer, align 8 + +// CHECK: @.pl_cond.z_[[ID]].iv = common global i32 0, align 4 +// CHECK: @pl_cond.z_[[ID]] = common global i32 0, align 4 +// CHECK: @.gomp_critical_user_pl_cond.z_[[ID]].var = common global [8 x i32] zeroinitializer, align 8 + +// CHECK: @.pl_cond.y_[[ID]].iv = common global i32 0, align 4 +// CHECK: @pl_cond.y_[[ID]] = common global i32 0, align 4 +// CHECK: @.gomp_critical_user_pl_cond.y_[[ID]].var = common global [8 x i32] zeroinitializer, align 8 + +// CHECK-LABEL: define internal void @condlastprivate.omp_outlined( +// CHECK: call void @__kmpc_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.x_[[ID]].var) +// CHECK: store i32 %{{[0-9]+}}, ptr @pl_cond.x_[[ID]], align 4 +// CHECK: call void @__kmpc_end_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.x_[[ID]].var) + +// CHECK: call void @__kmpc_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.z_[[ID]].var) +// CHECK: store i32 %{{[0-9]+}}, ptr @pl_cond.z_[[ID]], align 4 +// CHECK: call void @__kmpc_end_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.z_[[ID]].var) + +// CHECK: call void @__kmpc_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.y_[[ID]].var) +// CHECK: store i32 %{{[0-9]+}}, ptr @pl_cond.y_[[ID]], align 4 +// CHECK: call void @__kmpc_end_critical(ptr @2, {{.*}}, ptr @.gomp_critical_user_pl_cond.y_[[ID]].var) + diff --git a/clang/test/OpenMP/host-ir-file-vfs.c b/clang/test/OpenMP/host-ir-file-vfs.c new file mode 100644 index 0000000..394d8fb --- /dev/null +++ b/clang/test/OpenMP/host-ir-file-vfs.c @@ -0,0 +1,33 @@ +// This test checks that the OpenMP host IR file goes through VFS overlays. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: sed -e "s|DIR|%/t|g" %t/vfs.json.in > %t/vfs.json +// RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %t/host.c -o %t/host.bc + +// RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %t/device.c -o - \ +// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t/virtual/host.bc -ivfsoverlay %t/vfs.json -verify + +//--- vfs.json.in +{ + 'version': 0, + 'use-external-names': true, + 'roots': [ + { + 'name': 'DIR/virtual', + 'type': 'directory', + 'contents': [ + { + 'name': 'host.bc', + 'type': 'file', + 'external-contents': 'DIR/host.bc' + } + ] + } + ] +} + +//--- host.c +//--- device.c +// expected-no-diagnostics diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 3f6c1dc..0045bd4 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -5,11 +5,6 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1-OMP60 -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2-OMP60 -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2-OMP60 // expected-no-diagnostics #ifndef HEADER @@ -28,11 +23,7 @@ struct TT { void targetBar(int *Ptr1, int *Ptr2) { #pragma omp target map(Ptr1[:0], Ptr2) -#ifdef OMP60 -#pragma omp parallel num_threads(strict: 2) severity(warning) message("msg") -#else #pragma omp parallel num_threads(2) -#endif *Ptr1 = *Ptr2; } @@ -153,7 +144,7 @@ void unreachable_call() { } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -163,8 +154,8 @@ void unreachable_call() { // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -173,14 +164,14 @@ void unreachable_call() { // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined // CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -191,8 +182,8 @@ void unreachable_call() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 @@ -200,12 +191,12 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -215,14 +206,14 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[AA:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -242,7 +233,7 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -265,15 +256,15 @@ void unreachable_call() { // CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -335,7 +326,7 @@ void unreachable_call() { // CHECK1-NEXT: ret ptr [[X]] // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -348,8 +339,8 @@ void unreachable_call() { // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -376,7 +367,7 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -394,8 +385,8 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META12]], !align [[META15:![0-9]+]] -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -433,7 +424,7 @@ void unreachable_call() { // CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 @@ -444,12 +435,12 @@ void unreachable_call() { // CHECK1-NEXT: ret i32 [[TMP4]] // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -462,7 +453,7 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -473,8 +464,8 @@ void unreachable_call() { // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -508,10 +499,10 @@ void unreachable_call() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META12]], !align [[META13]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 @@ -539,7 +530,7 @@ void unreachable_call() { // CHECK1-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -549,8 +540,8 @@ void unreachable_call() { // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -559,14 +550,14 @@ void unreachable_call() { // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined // CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -577,8 +568,8 @@ void unreachable_call() { // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 @@ -586,12 +577,12 @@ void unreachable_call() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -601,14 +592,14 @@ void unreachable_call() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[AA:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -628,7 +619,7 @@ void unreachable_call() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -651,15 +642,15 @@ void unreachable_call() { // CHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 // CHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META12]], !align [[META14:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -721,7 +712,7 @@ void unreachable_call() { // CHECK2-NEXT: ret ptr [[X]] // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -734,8 +725,8 @@ void unreachable_call() { // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -762,7 +753,7 @@ void unreachable_call() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -780,8 +771,8 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META12]], !align [[META15:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -819,7 +810,7 @@ void unreachable_call() { // CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 @@ -830,12 +821,12 @@ void unreachable_call() { // CHECK2-NEXT: ret i32 [[TMP4]] // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -848,7 +839,7 @@ void unreachable_call() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -859,8 +850,8 @@ void unreachable_call() { // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -894,10 +885,10 @@ void unreachable_call() { // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12]], !align [[META14]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 // CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 @@ -924,775 +915,3 @@ void unreachable_call() { // CHECK2-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: ret void // -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-OMP60-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-OMP60-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-OMP60-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2, i32 1, i32 1, ptr @.str) -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined -// CHECK1-OMP60-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-OMP60-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[AA:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK1-OMP60-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-OMP60-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK1-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 -// CHECK1-OMP60-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-OMP60-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14:![0-9]+]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-OMP60-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-OMP60-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double -// CHECK1-OMP60-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 -// CHECK1-OMP60-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK1-OMP60-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-OMP60-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-OMP60-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double -// CHECK1-OMP60-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK1-OMP60-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK1-OMP60-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK1-OMP60-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-OMP60-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK1-OMP60-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-OMP60-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 -// CHECK1-OMP60-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK1-OMP60-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-OMP60-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] -// CHECK1-OMP60-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK1-OMP60-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-OMP60-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK1-OMP60-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-OMP60-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-OMP60-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 -// CHECK1-OMP60-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK1-OMP60-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-OMP60-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-OMP60-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-OMP60-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK1-OMP60-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK1-OMP60-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 -// CHECK1-OMP60-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] -// CHECK1-OMP60-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 -// CHECK1-OMP60-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 -// CHECK1-OMP60-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK1-OMP60-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-OMP60-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: ret ptr [[X]] -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-OMP60-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK1-OMP60-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-OMP60-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-OMP60-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK1-OMP60-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-OMP60-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK1-OMP60-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK1-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-OMP60-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META12]], !align [[META15:![0-9]+]] -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-OMP60-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-OMP60-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 -// CHECK1-OMP60-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 -// CHECK1-OMP60-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK1-OMP60-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-OMP60-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP7]] -// CHECK1-OMP60-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-OMP60-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 -// CHECK1-OMP60-NEXT: [[A6:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 -// CHECK1-OMP60-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 -// CHECK1-OMP60-NEXT: [[A8:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-OMP60-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-OMP60-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-OMP60-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-OMP60-NEXT: store i32 [[F1]], ptr [[F]], align 4 -// CHECK1-OMP60-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-OMP60-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-OMP60-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 -// CHECK1-OMP60-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 -// CHECK1-OMP60-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4) -// CHECK1-OMP60-NEXT: ret i32 [[TMP4]] -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: call void @_Z6asserti(i32 0) #[[ATTR11:[0-9]+]] -// CHECK1-OMP60-NEXT: unreachable -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: 1: -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83 -// CHECK1-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83_kernel_environment, ptr [[DYN_PTR]]) -// CHECK1-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK1-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1-OMP60: user_code.entry: -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-OMP60-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK1-OMP60-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-OMP60-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-OMP60-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK1-OMP60-NEXT: ret void -// CHECK1-OMP60: worker.exit: -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined -// CHECK1-OMP60-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-OMP60-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8, !nonnull [[META12]], !align [[META14]] -// CHECK1-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META12]], !align [[META13]] -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK1-OMP60-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] -// CHECK1-OMP60-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK1-OMP60-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK1-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper -// CHECK1-OMP60-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { -// CHECK1-OMP60-NEXT: entry: -// CHECK1-OMP60-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-OMP60-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-OMP60-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-OMP60-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -// CHECK1-OMP60-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 -// CHECK1-OMP60-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK1-OMP60-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-OMP60-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-OMP60-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-OMP60-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] -// CHECK1-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-OMP60-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2, i32 1, i32 1, ptr @.str) -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l30_omp_outlined -// CHECK2-OMP60-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l48_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[AA:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l56_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK2-OMP60-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-OMP60-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK2-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 -// CHECK2-OMP60-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-OMP60-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META12]], !align [[META14:![0-9]+]] -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK2-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK2-OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-OMP60-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-OMP60-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l62_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-OMP60-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double -// CHECK2-OMP60-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 -// CHECK2-OMP60-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK2-OMP60-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK2-OMP60-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK2-OMP60-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double -// CHECK2-OMP60-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK2-OMP60-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK2-OMP60-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK2-OMP60-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-OMP60-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK2-OMP60-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK2-OMP60-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 -// CHECK2-OMP60-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK2-OMP60-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK2-OMP60-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP13]] -// CHECK2-OMP60-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK2-OMP60-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK2-OMP60-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK2-OMP60-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK2-OMP60-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 -// CHECK2-OMP60-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 -// CHECK2-OMP60-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK2-OMP60-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK2-OMP60-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK2-OMP60-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK2-OMP60-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK2-OMP60-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK2-OMP60-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 -// CHECK2-OMP60-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] -// CHECK2-OMP60-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 -// CHECK2-OMP60-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 -// CHECK2-OMP60-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK2-OMP60-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: ret ptr [[X]] -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l99_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK2-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-OMP60-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK2-OMP60-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-OMP60-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK2-OMP60-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK2-OMP60-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK2-OMP60-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK2-OMP60-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK2-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-OMP60-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META12]], !align [[META15:![0-9]+]] -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l117_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK2-OMP60-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK2-OMP60-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 -// CHECK2-OMP60-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 -// CHECK2-OMP60-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK2-OMP60-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK2-OMP60-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK2-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP7]] -// CHECK2-OMP60-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK2-OMP60-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 -// CHECK2-OMP60-NEXT: [[A6:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 -// CHECK2-OMP60-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 -// CHECK2-OMP60-NEXT: [[A8:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK2-OMP60-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-OMP60-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-OMP60-NEXT: store i32 [[F1]], ptr [[F]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-OMP60-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-OMP60-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4) -// CHECK2-OMP60-NEXT: ret i32 [[TMP4]] -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l151_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: call void @_Z6asserti(i32 0) #[[ATTR11:[0-9]+]] -// CHECK2-OMP60-NEXT: unreachable -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: 1: -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83 -// CHECK2-OMP60-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l83_kernel_environment, ptr [[DYN_PTR]]) -// CHECK2-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK2-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK2-OMP60: user_code.entry: -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK2-OMP60-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-OMP60-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK2-OMP60-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-OMP60-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK2-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK2-OMP60-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_target_deinit() -// CHECK2-OMP60-NEXT: ret void -// CHECK2-OMP60: worker.exit: -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined -// CHECK2-OMP60-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-OMP60-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4, !nonnull [[META12]], !align [[META13]] -// CHECK2-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-OMP60-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12]], !align [[META14]] -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK2-OMP60-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] -// CHECK2-OMP60-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK2-OMP60-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 -// CHECK2-OMP60-NEXT: ret void -// -// -// CHECK2-OMP60-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper -// CHECK2-OMP60-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { -// CHECK2-OMP60-NEXT: entry: -// CHECK2-OMP60-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-OMP60-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-OMP60-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4 -// CHECK2-OMP60-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 -// CHECK2-OMP60-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK2-OMP60-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-OMP60-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-OMP60-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-OMP60-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 -// CHECK2-OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK2-OMP60-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] -// CHECK2-OMP60-NEXT: ret void -// diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index dc9a2b7..f92ce4e 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -1,16 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 - -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1 -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 -// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 @@ -32,13 +26,6 @@ tx ftemplate(int n) { { aa += 1; } - #ifdef OMP60 - char str[] = "msg"; - #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024) severity(warning) message(str) - { - aa += 1; - } - #endif #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) { @@ -46,15 +33,6 @@ tx ftemplate(int n) { aa += 1; b[2] += 1; } - #ifdef OMP60 - const char *str1 = "msg1"; - #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n) severity(warning) message(str1) - { - a += 1; - aa += 1; - b[2] += 1; - } - #endif return a; } @@ -68,675 +46,7 @@ int bar(int n){ } #endif -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) -// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_1: user_code.entry: -// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// OMP45_1-NEXT: call void @__kmpc_target_deinit() -// OMP45_1-NEXT: ret void -// OMP45_1: worker.exit: -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 -// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] -// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] -// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) -// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_1: user_code.entry: -// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// OMP45_1-NEXT: call void @__kmpc_target_deinit() -// OMP45_1-NEXT: ret void -// OMP45_1: worker.exit: -// OMP45_1-NEXT: ret void -// -// -// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP45_1-NEXT: entry: -// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] -// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] -// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] -// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP45_1-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 -// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) -// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_2: user_code.entry: -// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// OMP45_2-NEXT: call void @__kmpc_target_deinit() -// OMP45_2-NEXT: ret void -// OMP45_2: worker.exit: -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 -// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP45_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) -// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP45_2: user_code.entry: -// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// OMP45_2-NEXT: call void @__kmpc_target_deinit() -// OMP45_2-NEXT: ret void -// OMP45_2: worker.exit: -// OMP45_2-NEXT: ret void -// -// -// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP45_2-NEXT: entry: -// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP45_2-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]] -// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META8]] -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// OMP60_1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META8]] -// OMP60_1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP5]], i64 0, i64 0 -// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 1, i32 1, ptr [[ARRAYDECAY]]) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10:![0-9]+]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51 -// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR4]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 -// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_1: user_code.entry: -// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3, i32 1, i32 1, ptr [[TMP9]]) -// OMP60_1-NEXT: call void @__kmpc_target_deinit() -// OMP60_1-NEXT: ret void -// OMP60_1: worker.exit: -// OMP60_1-NEXT: ret void -// -// -// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined -// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_1-NEXT: entry: -// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] -// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] -// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_1-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]] -// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !nonnull [[META8]] -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// OMP60_2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META8]] -// OMP60_2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP5]], i32 0, i32 0 -// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1, i32 1, i32 1, ptr [[ARRAYDECAY]]) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10:![0-9]+]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51 -// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR4]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 -// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_kernel_environment, ptr [[DYN_PTR]]) -// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 -// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// OMP60_2: user_code.entry: -// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// OMP60_2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3, i32 1, i32 1, ptr [[TMP9]]) -// OMP60_2-NEXT: call void @__kmpc_target_deinit() -// OMP60_2-NEXT: ret void -// OMP60_2: worker.exit: -// OMP60_2-NEXT: ret void -// -// -// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l51_omp_outlined -// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// OMP60_2-NEXT: entry: -// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] -// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] -// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 -// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 -// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// OMP60_2-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -744,22 +54,22 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -768,7 +78,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 @@ -777,7 +87,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -791,10 +101,10 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -806,14 +116,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -826,9 +136,9 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 @@ -844,7 +154,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -852,22 +162,22 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -876,7 +186,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 @@ -885,7 +195,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -899,10 +209,10 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -914,14 +224,14 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -934,9 +244,9 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 diff --git a/clang/test/OpenMP/spirv_kernel_addrspace.cpp b/clang/test/OpenMP/spirv_kernel_addrspace.cpp new file mode 100644 index 0000000..cea7e99 --- /dev/null +++ b/clang/test/OpenMP/spirv_kernel_addrspace.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc -DTEAMS +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -DTEAMS -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK: define weak_odr protected spir_kernel void @__omp_offloading_{{.*}}(ptr addrspace(1) noalias noundef %{{.*}}, ptr addrspace(1) noundef align 4 dereferenceable(128) %{{.*}}) + +int main() { + int x[32] = {0}; + +#ifdef TEAMS +#pragma omp target teams +#else +#pragma omp target +#endif + for(int i = 0; i < 32; i++) { + if(i > 0) + x[i] = x[i-1] + i; + } + +return x[31]; +} + diff --git a/clang/test/OpenMP/spirv_variant_match.cpp b/clang/test/OpenMP/spirv_variant_match.cpp index b37858bc..7dcee7e 100644 --- a/clang/test/OpenMP/spirv_variant_match.cpp +++ b/clang/test/OpenMP/spirv_variant_match.cpp @@ -35,7 +35,7 @@ int foo() { return 1; } // CHECK-DAG: define{{.*}} @{{"_Z[0-9]+foo\$ompvariant\$.*"}}() -// CHECK-DAG: call spir_func noundef i32 @{{"_Z[0-9]+foo\$ompvariant\$.*"}}() +// CHECK-DAG: call spir_func noundef addrspace(9) i32 @{{"_Z[0-9]+foo\$ompvariant\$.*"}}() int main() { int res; diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp index 6a04571..b9ed9bc 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp @@ -10,15 +10,6 @@ // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=IR-GPU-OMP60 - -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefixes=IR-OMP60 - -// Check same results after serialization round-trip -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s -// RUN: %clang_cc1 -DOMP60 -fopenmp-version=60 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefixes=IR-PCH-OMP60 - // expected-no-diagnostics #ifndef HEADER @@ -43,22 +34,14 @@ int main() { int x = 0; int device_result[N] = {0}; - #ifdef OMP60 - #pragma omp target parallel loop num_threads(strict: N) severity(warning) message("msg") uses_allocators(omp_pteam_mem_alloc) allocate(omp_pteam_mem_alloc: x) private(x) map(from: device_result) - for (int i = 0; i < N; i++) { - x = omp_get_thread_num(); - device_result[i] = i + x; - } - #else #pragma omp target parallel loop num_threads(N) uses_allocators(omp_pteam_mem_alloc) allocate(omp_pteam_mem_alloc: x) private(x) map(from: device_result) for (int i = 0; i < N; i++) { x = omp_get_thread_num(); device_result[i] = i + x; } - #endif } #endif -// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53 +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR0:[0-9]+]] { // IR-GPU-NEXT: entry: // IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -72,8 +55,8 @@ int main() { // IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment to ptr), ptr [[DYN_PTR]]) // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // IR-GPU: user_code.entry: @@ -83,14 +66,14 @@ int main() { // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 // IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8 -// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2) +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2) // IR-GPU-NEXT: call void @__kmpc_target_deinit() // IR-GPU-NEXT: ret void // IR-GPU: worker.exit: // IR-GPU-NEXT: ret void // // -// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53_omp_outlined +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR1:[0-9]+]] { // IR-GPU-NEXT: entry: // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -119,7 +102,7 @@ int main() { // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 // IR-GPU-NEXT: store i32 63, ptr [[DOTOMP_UB_ASCAST]], align 4 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 @@ -200,11 +183,11 @@ int main() { // IR-NEXT: store i32 0, ptr [[X]], align 4 // IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] // IR-NEXT: ret i32 0 // // -// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53 +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // IR-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] { // IR-NEXT: entry: // IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 @@ -212,14 +195,14 @@ int main() { // IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) // IR-NEXT: ret void // // -// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53.omp_outlined +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { // IR-NEXT: entry: // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -237,7 +220,7 @@ int main() { // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // IR-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -307,11 +290,11 @@ int main() { // IR-PCH-NEXT: store i32 0, ptr [[X]], align 4 // IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] // IR-PCH-NEXT: ret i32 0 // // -// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53 +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // IR-PCH-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] { // IR-PCH-NEXT: entry: // IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 @@ -319,14 +302,14 @@ int main() { // IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) // IR-PCH-NEXT: ret void // // -// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l53.omp_outlined +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { // IR-PCH-NEXT: entry: // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -344,7 +327,7 @@ int main() { // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // IR-PCH-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -405,386 +388,3 @@ int main() { // IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]]) // IR-PCH-NEXT: ret void // -// -// IR-GPU-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47 -// IR-GPU-OMP60-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { -// IR-GPU-OMP60-NEXT: entry: -// IR-GPU-OMP60-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// IR-GPU-OMP60-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// IR-GPU-OMP60-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// IR-GPU-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8, !nonnull [[META6]] -// IR-GPU-OMP60-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47_kernel_environment to ptr), ptr [[DYN_PTR]]) -// IR-GPU-OMP60-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 -// IR-GPU-OMP60-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// IR-GPU-OMP60: user_code.entry: -// IR-GPU-OMP60-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) -// IR-GPU-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// IR-GPU-OMP60-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// IR-GPU-OMP60-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !nonnull [[META6]] -// IR-GPU-OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP7]], i64 0, i64 0 -// IR-GPU-OMP60-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP3]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2, i32 1, i32 1, ptr [[ARRAYDECAY]]) -// IR-GPU-OMP60-NEXT: call void @__kmpc_target_deinit() -// IR-GPU-OMP60-NEXT: ret void -// IR-GPU-OMP60: worker.exit: -// IR-GPU-OMP60-NEXT: ret void -// -// -// IR-GPU-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47_omp_outlined -// IR-GPU-OMP60-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR1:[0-9]+]] { -// IR-GPU-OMP60-NEXT: entry: -// IR-GPU-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr -// IR-GPU-OMP60-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr -// IR-GPU-OMP60-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr -// IR-GPU-OMP60-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr -// IR-GPU-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] -// IR-GPU-OMP60-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: store i32 63, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// IR-GPU-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// IR-GPU-OMP60-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP2]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) -// IR-GPU-OMP60-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// IR-GPU-OMP60: omp.dispatch.cond: -// IR-GPU-OMP60-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 63 -// IR-GPU-OMP60-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-GPU-OMP60: cond.true: -// IR-GPU-OMP60-NEXT: br label [[COND_END:%.*]] -// IR-GPU-OMP60: cond.false: -// IR-GPU-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: br label [[COND_END]] -// IR-GPU-OMP60: cond.end: -// IR-GPU-OMP60-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// IR-GPU-OMP60-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// IR-GPU-OMP60-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// IR-GPU-OMP60: omp.dispatch.body: -// IR-GPU-OMP60-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-GPU-OMP60: omp.inner.for.cond: -// IR-GPU-OMP60-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// IR-GPU-OMP60-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-GPU-OMP60: omp.inner.for.body: -// IR-GPU-OMP60-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 -// IR-GPU-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-GPU-OMP60-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() #[[ATTR5:[0-9]+]] -// IR-GPU-OMP60-NEXT: store i32 [[CALL]], ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4 -// IR-GPU-OMP60-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP12:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4 -// IR-GPU-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// IR-GPU-OMP60-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// IR-GPU-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// IR-GPU-OMP60-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// IR-GPU-OMP60-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-GPU-OMP60: omp.body.continue: -// IR-GPU-OMP60-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-GPU-OMP60: omp.inner.for.inc: -// IR-GPU-OMP60-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// IR-GPU-OMP60-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: br label [[OMP_INNER_FOR_COND]] -// IR-GPU-OMP60: omp.inner.for.end: -// IR-GPU-OMP60-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// IR-GPU-OMP60: omp.dispatch.inc: -// IR-GPU-OMP60-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// IR-GPU-OMP60-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// IR-GPU-OMP60-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4 -// IR-GPU-OMP60-NEXT: br label [[OMP_DISPATCH_COND]] -// IR-GPU-OMP60: omp.dispatch.end: -// IR-GPU-OMP60-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP2]]) -// IR-GPU-OMP60-NEXT: ret void -// -// -// IR-OMP60-LABEL: define {{[^@]+}}@main -// IR-OMP60-SAME: () #[[ATTR0:[0-9]+]] { -// IR-OMP60-NEXT: entry: -// IR-OMP60-NEXT: [[X:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16 -// IR-OMP60-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: store i32 0, ptr [[X]], align 4 -// IR-OMP60-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) -// IR-OMP60-NEXT: store ptr @.str, ptr [[DOTCAPTURE_EXPR_]], align 8 -// IR-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8, !nonnull [[META3:![0-9]+]] -// IR-OMP60-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// IR-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META3]] -// IR-OMP60-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47(ptr [[DEVICE_RESULT]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] -// IR-OMP60-NEXT: ret i32 0 -// -// -// IR-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47 -// IR-OMP60-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2:[0-9]+]] { -// IR-OMP60-NEXT: entry: -// IR-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// IR-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 -// IR-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-OMP60-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// IR-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4:![0-9]+]] -// IR-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META3]] -// IR-OMP60-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// IR-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META3]] -// IR-OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i64 0, i64 0 -// IR-OMP60-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB2]], i32 [[TMP0]], i32 64, i32 1, ptr [[ARRAYDECAY]]) -// IR-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-OMP60-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47.omp_outlined, ptr [[TMP1]], ptr [[TMP4]]) -// IR-OMP60-NEXT: ret void -// -// -// IR-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47.omp_outlined -// IR-OMP60-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { -// IR-OMP60-NEXT: entry: -// IR-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 -// IR-OMP60-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 -// IR-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] -// IR-OMP60-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-OMP60-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 -// IR-OMP60-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-OMP60-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// IR-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-OMP60-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]]) -// IR-OMP60-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-OMP60-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63 -// IR-OMP60-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-OMP60: cond.true: -// IR-OMP60-NEXT: br label [[COND_END:%.*]] -// IR-OMP60: cond.false: -// IR-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-OMP60-NEXT: br label [[COND_END]] -// IR-OMP60: cond.end: -// IR-OMP60-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// IR-OMP60-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-OMP60-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-OMP60-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// IR-OMP60-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-OMP60: omp.inner.for.cond: -// IR-OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-OMP60-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-OMP60-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// IR-OMP60-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// IR-OMP60: omp.inner.for.cond.cleanup: -// IR-OMP60-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// IR-OMP60: omp.inner.for.body: -// IR-OMP60-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-OMP60-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// IR-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-OMP60-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// IR-OMP60-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() -// IR-OMP60-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4 -// IR-OMP60-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// IR-OMP60-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 -// IR-OMP60-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// IR-OMP60-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// IR-OMP60-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// IR-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// IR-OMP60-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 -// IR-OMP60-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-OMP60: omp.body.continue: -// IR-OMP60-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-OMP60: omp.inner.for.inc: -// IR-OMP60-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// IR-OMP60-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// IR-OMP60-NEXT: br label [[OMP_INNER_FOR_COND]] -// IR-OMP60: omp.inner.for.end: -// IR-OMP60-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-OMP60: omp.loop.exit: -// IR-OMP60-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// IR-OMP60-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-OMP60-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]]) -// IR-OMP60-NEXT: ret void -// -// -// IR-PCH-OMP60-LABEL: define {{[^@]+}}@main -// IR-PCH-OMP60-SAME: () #[[ATTR0:[0-9]+]] { -// IR-PCH-OMP60-NEXT: entry: -// IR-PCH-OMP60-NEXT: [[X:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16 -// IR-PCH-OMP60-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: store i32 0, ptr [[X]], align 4 -// IR-PCH-OMP60-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) -// IR-PCH-OMP60-NEXT: store ptr @.str, ptr [[DOTCAPTURE_EXPR_]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8, !nonnull [[META3:![0-9]+]] -// IR-PCH-OMP60-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-PCH-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META3]] -// IR-PCH-OMP60-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47(ptr [[DEVICE_RESULT]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] -// IR-PCH-OMP60-NEXT: ret i32 0 -// -// -// IR-PCH-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47 -// IR-PCH-OMP60-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2:[0-9]+]] { -// IR-PCH-OMP60-NEXT: entry: -// IR-PCH-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// IR-PCH-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 -// IR-PCH-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-OMP60-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4:![0-9]+]] -// IR-PCH-OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META3]] -// IR-PCH-OMP60-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META3]] -// IR-PCH-OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-OMP60-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB2]], i32 [[TMP0]], i32 64, i32 1, ptr [[ARRAYDECAY]]) -// IR-PCH-OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-OMP60-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47.omp_outlined, ptr [[TMP1]], ptr [[TMP4]]) -// IR-PCH-OMP60-NEXT: ret void -// -// -// IR-PCH-OMP60-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l47.omp_outlined -// IR-PCH-OMP60-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { -// IR-PCH-OMP60-NEXT: entry: -// IR-PCH-OMP60-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-OMP60-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-OMP60-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-OMP60-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-OMP60-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 -// IR-PCH-OMP60-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8, !nonnull [[META3]], !align [[META4]] -// IR-PCH-OMP60-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-OMP60-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-OMP60-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-OMP60-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-OMP60-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-PCH-OMP60-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]]) -// IR-PCH-OMP60-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-OMP60-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-OMP60-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63 -// IR-PCH-OMP60-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH-OMP60: cond.true: -// IR-PCH-OMP60-NEXT: br label [[COND_END:%.*]] -// IR-PCH-OMP60: cond.false: -// IR-PCH-OMP60-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-OMP60-NEXT: br label [[COND_END]] -// IR-PCH-OMP60: cond.end: -// IR-PCH-OMP60-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// IR-PCH-OMP60-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-OMP60-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-OMP60-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH-OMP60: omp.inner.for.cond: -// IR-PCH-OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-OMP60-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// IR-PCH-OMP60-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// IR-PCH-OMP60: omp.inner.for.cond.cleanup: -// IR-PCH-OMP60-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH-OMP60: omp.inner.for.body: -// IR-PCH-OMP60-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-OMP60-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// IR-PCH-OMP60-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-PCH-OMP60-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// IR-PCH-OMP60-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() -// IR-PCH-OMP60-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// IR-PCH-OMP60-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 -// IR-PCH-OMP60-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// IR-PCH-OMP60-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// IR-PCH-OMP60-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// IR-PCH-OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// IR-PCH-OMP60-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 -// IR-PCH-OMP60-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-PCH-OMP60: omp.body.continue: -// IR-PCH-OMP60-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH-OMP60: omp.inner.for.inc: -// IR-PCH-OMP60-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-OMP60-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// IR-PCH-OMP60-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-OMP60-NEXT: br label [[OMP_INNER_FOR_COND]] -// IR-PCH-OMP60: omp.inner.for.end: -// IR-PCH-OMP60-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH-OMP60: omp.loop.exit: -// IR-PCH-OMP60-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// IR-PCH-OMP60-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 -// IR-PCH-OMP60-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]]) -// IR-PCH-OMP60-NEXT: ret void -// diff --git a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp deleted file mode 100644 index 9e319e4..0000000 --- a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp +++ /dev/null @@ -1,2956 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" - -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK11 -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 - -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" - -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - - -// We have 6 target regions - -// Check target registration is registered as a Ctor. - -// Check that the offloading functions are emitted and that the parallel function -// is appropriately guarded. - - -template<typename tx> -tx ftemplate(int n) { - tx a = 0; - char str[] = "msg"; - const char *str1 = "msg1"; - - #pragma omp target parallel num_threads(strict: tx(20)) severity(warning) message(str) - { - } - - #pragma omp target parallel num_threads(strict: 42) severity(warning) message("msg_literal") - { - } - - short b = 1; - #pragma omp target parallel num_threads(strict: b) severity(fatal) message(str1) - { - a += b; - } - - return a; -} - -static -int fstatic(int n, const char *str2) { - char str[] = "msg"; - const char *str1 = "msg1"; - - #pragma omp target parallel num_threads(strict: n) severity(warning) message(str) - { - } - - #pragma omp target parallel num_threads(strict: n) severity(fatal) message("msg_literal") - { - } - - #pragma omp target parallel num_threads(strict: 32+n) severity(fatal) message(str1) - { - } - - #pragma omp target parallel num_threads(strict: 32+n) severity(warning) message(str2) - { - } - - return n+1; -} - -struct S1 { - double a; - - int r1(int n){ - int b = 1; - char str[] = "msg"; - const char *str1 = "msg1"; - - #pragma omp target parallel num_threads(strict: n-b) severity(warning) message(str) - { - this->a = (double)b + 1.5; - } - - #pragma omp target parallel num_threads(strict: 1024) severity(fatal) message(str1) - { - this->a = 2.5; - } - - #pragma omp target parallel num_threads(strict: n) severity(fatal) message("msg_literal") - { - this->a = 2.5; - } - - return (int)a; - } -}; - -int bar(int n){ - int a = 0; - const char *str = "msg_arg"; - - S1 S; - a += S.r1(n); - - a += fstatic(n, str); - - a += ftemplate<int>(n); - - return a; -} - -#endif -// CHECK1-LABEL: define {{[^@]+}}@_Z3bari -// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[STR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 -// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: store ptr @.str, ptr [[STR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP0]]) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR]], align 8 -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZL7fstaticiPKc(i32 noundef signext [[TMP2]], ptr noundef [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP5]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: ret i32 [[TMP7]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei -// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK1-NEXT: [[STR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP13:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED14:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS16:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS17:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS18:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[B]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR]], ptr align 1 @__const._ZN2S12r1Ei.str, i64 4, i1 false) -// CHECK1-NEXT: store ptr @.str.1, ptr [[STR1]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8, !nonnull [[META23:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP22]], 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP24]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP23]], ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP36]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK1-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104(ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP7]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[STR1]], align 8 -// CHECK1-NEXT: store ptr [[TMP39]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A4]], ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP40]], ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP49]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP50]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP60]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP61]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.region_id, ptr [[KERNEL_ARGS8]]) -// CHECK1-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 -// CHECK1-NEXT: br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED9:%.*]], label [[OMP_OFFLOAD_CONT10:%.*]] -// CHECK1: omp_offload.failed9: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109(ptr [[THIS1]], ptr [[TMP40]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT10]] -// CHECK1: omp_offload.cont10: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP64]], ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK1-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP65]], ptr [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK1-NEXT: store i32 [[TMP66]], ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED14]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP13]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[A15:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A15]], ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP67]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP67]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP80]], 0 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP82]], align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP83]], align 4 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP78]], ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP85]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP87]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP92]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP81]], ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP94]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP80]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.region_id, ptr [[KERNEL_ARGS19]]) -// CHECK1-NEXT: [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0 -// CHECK1-NEXT: br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED20:%.*]], label [[OMP_OFFLOAD_CONT21:%.*]] -// CHECK1: omp_offload.failed20: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114(ptr [[THIS1]], i64 [[TMP67]], ptr [[TMP68]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT21]] -// CHECK1: omp_offload.cont21: -// CHECK1-NEXT: [[A22:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP97:%.*]] = load double, ptr [[A22]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP97]] to i32 -// CHECK1-NEXT: ret i32 [[CONV]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstaticiPKc -// CHECK1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[STR2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[STR2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK1-NEXT: [[STR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED5:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS7:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS8:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS9:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED14:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS16:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_23:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED24:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS25:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS26:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS27:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[STR2]], ptr [[STR2_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR]], ptr align 1 @__const._ZL7fstaticiPKc.str, i64 4, i1 false) -// CHECK1-NEXT: store ptr @.str.1, ptr [[STR1]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_1]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP14]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP13]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77(i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP31]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR__CASTED5]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED5]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP33]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP33]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS8]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS8]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP43]], 0 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP41]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP42]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP44]], ptr [[TMP56]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP57]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP43]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.region_id, ptr [[KERNEL_ARGS9]]) -// CHECK1-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK1-NEXT: br i1 [[TMP59]], label [[OMP_OFFLOAD_FAILED10:%.*]], label [[OMP_OFFLOAD_CONT11:%.*]] -// CHECK1: omp_offload.failed10: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81(i64 [[TMP33]], ptr [[TMP34]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT11]] -// CHECK1: omp_offload.cont11: -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP60]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[STR1]], align 8 -// CHECK1-NEXT: store ptr [[TMP61]], ptr [[DOTCAPTURE_EXPR_13]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK1-NEXT: store i32 [[TMP62]], ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED14]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_13]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP63]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP63]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS17]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS17]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK1-NEXT: [[TMP74:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP73]], 0 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP75]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP79]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP80]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP85]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP74]], ptr [[TMP86]], align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP87]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP73]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.region_id, ptr [[KERNEL_ARGS18]]) -// CHECK1-NEXT: [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0 -// CHECK1-NEXT: br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] -// CHECK1: omp_offload.failed19: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85(i64 [[TMP63]], ptr [[TMP64]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT20]] -// CHECK1: omp_offload.cont20: -// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 32, [[TMP90]] -// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[STR2_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP91]], ptr [[DOTCAPTURE_EXPR_23]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: store i32 [[TMP92]], ptr [[DOTCAPTURE_EXPR__CASTED24]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED24]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_23]], align 8 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP93]], ptr [[TMP95]], align 8 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP93]], ptr [[TMP96]], align 8 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP97]], align 8 -// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP94]], ptr [[TMP98]], align 8 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP94]], ptr [[TMP99]], align 8 -// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP100]], align 8 -// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP103:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP103]], 0 -// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP105]], align 4 -// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP106]], align 4 -// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP101]], ptr [[TMP107]], align 8 -// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP108]], align 8 -// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.13, ptr [[TMP109]], align 8 -// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP110]], align 8 -// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP111]], align 8 -// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP112]], align 8 -// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP113]], align 8 -// CHECK1-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP114]], align 8 -// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP115]], align 4 -// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP104]], ptr [[TMP116]], align 4 -// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP117]], align 4 -// CHECK1-NEXT: [[TMP118:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP103]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.region_id, ptr [[KERNEL_ARGS28]]) -// CHECK1-NEXT: [[TMP119:%.*]] = icmp ne i32 [[TMP118]], 0 -// CHECK1-NEXT: br i1 [[TMP119]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]] -// CHECK1: omp_offload.failed29: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89(i64 [[TMP93]], ptr [[TMP94]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT30]] -// CHECK1: omp_offload.cont30: -// CHECK1-NEXT: [[TMP120:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP120]], 1 -// CHECK1-NEXT: ret i32 [[ADD31]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i -// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK1-NEXT: [[STR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS12:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR]], ptr align 1 @__const._Z9ftemplateIiET_i.str, i64 4, i1 false) -// CHECK1-NEXT: store ptr @.str.1, ptr [[STR1]], align 8 -// CHECK1-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.15, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55(ptr [[TMP1]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK1: omp_offload.cont: -// CHECK1-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_1]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP29]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP30]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.17, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP39]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [i32 42, i32 0, i32 0], ptr [[TMP40]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 42, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] -// CHECK1: omp_offload.failed7: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59(ptr [[TMP23]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] -// CHECK1: omp_offload.cont8: -// CHECK1-NEXT: store i16 1, ptr [[B]], align 2 -// CHECK1-NEXT: [[TMP44:%.*]] = load i16, ptr [[B]], align 2 -// CHECK1-NEXT: store i16 [[TMP44]], ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[STR1]], align 8 -// CHECK1-NEXT: store ptr [[TMP45]], ptr [[DOTCAPTURE_EXPR_10]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: store i32 [[TMP46]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load i16, ptr [[B]], align 2 -// CHECK1-NEXT: store i16 [[TMP48]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK1-NEXT: store i16 [[TMP50]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2 -// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP49]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP49]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK1-NEXT: [[TMP68:%.*]] = zext i16 [[TMP67]] to i32 -// CHECK1-NEXT: [[TMP69:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP68]], 0 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP71]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.19, ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP79]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP69]], ptr [[TMP81]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP82]], align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP68]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.region_id, ptr [[KERNEL_ARGS14]]) -// CHECK1-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 -// CHECK1-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] -// CHECK1: omp_offload.failed15: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64(i64 [[TMP47]], i64 [[TMP49]], i64 [[TMP51]], ptr [[TMP52]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT16]] -// CHECK1: omp_offload.cont16: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: ret i32 [[TMP85]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104 -// CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined, ptr [[TMP1]], i64 [[TMP6]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109 -// CHECK1-SAME: (ptr noundef [[THIS:%.*]], ptr noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr [[TMP2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined, ptr [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114 -// CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined, ptr [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77 -// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81 -// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85 -// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr [[TMP2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89 -// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 1, ptr [[TMP2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 -// CHECK1-SAME: (ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 1, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK1-SAME: (ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META23]] -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 42, i32 1, ptr [[ARRAYDECAY]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64 -// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP6]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP7]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_Z3bari -// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[STR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK3-NEXT: store ptr @.str, ptr [[STR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP0]]) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR]], align 4 -// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZL7fstaticiPKc(i32 noundef [[TMP2]], ptr noundef [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP5]]) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[A]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: ret i32 [[TMP7]] -// -// -// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei -// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK3-NEXT: [[STR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[_TMP13:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED14:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS16:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS17:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS18:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[B]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STR]], ptr align 1 @__const._ZN2S12r1Ei.str, i32 4, i1 false) -// CHECK3-NEXT: store ptr @.str.1, ptr [[STR1]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]] -// CHECK3-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 4, !nonnull [[META24:![0-9]+]] -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP22]], 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP32]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP23]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK3-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104(ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP7]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK3: omp_offload.cont: -// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[STR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP39]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A4]], ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP40]], ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP46]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP48]], ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP57]], align 8 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP58]], align 8 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP62:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.region_id, ptr [[KERNEL_ARGS8]]) -// CHECK3-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 -// CHECK3-NEXT: br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED9:%.*]], label [[OMP_OFFLOAD_CONT10:%.*]] -// CHECK3: omp_offload.failed9: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109(ptr [[THIS1]], ptr [[TMP40]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT10]] -// CHECK3: omp_offload.cont10: -// CHECK3-NEXT: [[TMP64:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP64]], ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK3-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK3-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP65]], ptr [[_TMP13]], align 4 -// CHECK3-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK3-NEXT: store i32 [[TMP66]], ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK3-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP13]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[A15:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP69]], align 4 -// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A15]], ptr [[TMP70]], align 4 -// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP71]], align 4 -// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP67]], ptr [[TMP72]], align 4 -// CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP67]], ptr [[TMP73]], align 4 -// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP74]], align 4 -// CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP68]], ptr [[TMP75]], align 4 -// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP68]], ptr [[TMP76]], align 4 -// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS18]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP77]], align 4 -// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP80:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK3-NEXT: [[TMP81:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP80]], 0 -// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP82]], align 4 -// CHECK3-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP83]], align 4 -// CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP78]], ptr [[TMP84]], align 4 -// CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP79]], ptr [[TMP85]], align 4 -// CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP86]], align 4 -// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP87]], align 4 -// CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP88]], align 4 -// CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP89]], align 4 -// CHECK3-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP90]], align 8 -// CHECK3-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP91]], align 8 -// CHECK3-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP92]], align 4 -// CHECK3-NEXT: [[TMP93:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP81]], ptr [[TMP93]], align 4 -// CHECK3-NEXT: [[TMP94:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP94]], align 4 -// CHECK3-NEXT: [[TMP95:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP80]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.region_id, ptr [[KERNEL_ARGS19]]) -// CHECK3-NEXT: [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0 -// CHECK3-NEXT: br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED20:%.*]], label [[OMP_OFFLOAD_CONT21:%.*]] -// CHECK3: omp_offload.failed20: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114(ptr [[THIS1]], i32 [[TMP67]], ptr [[TMP68]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT21]] -// CHECK3: omp_offload.cont21: -// CHECK3-NEXT: [[A22:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP97:%.*]] = load double, ptr [[A22]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fptosi double [[TMP97]] to i32 -// CHECK3-NEXT: ret i32 [[CONV]] -// -// -// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstaticiPKc -// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef [[STR2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[STR2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK3-NEXT: [[STR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS7:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS8:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS9:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED14:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS16:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_23:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED24:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS25:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS26:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS27:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[STR2]], ptr [[STR2_ADDR]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STR]], ptr align 1 @__const._ZL7fstaticiPKc.str, i32 4, i1 false) -// CHECK3-NEXT: store ptr @.str.1, ptr [[STR1]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_1]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP24]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP14]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP13]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77(i32 [[TMP3]], ptr [[TMP4]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK3: omp_offload.cont: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP31]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR__CASTED5]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED5]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS8]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP34]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS8]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP43]], 0 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP46]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP41]], ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP42]], ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.9, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP54]], align 8 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP44]], ptr [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP43]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.region_id, ptr [[KERNEL_ARGS9]]) -// CHECK3-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK3-NEXT: br i1 [[TMP59]], label [[OMP_OFFLOAD_FAILED10:%.*]], label [[OMP_OFFLOAD_CONT11:%.*]] -// CHECK3: omp_offload.failed10: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81(i32 [[TMP33]], ptr [[TMP34]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT11]] -// CHECK3: omp_offload.cont11: -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP60]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = load ptr, ptr [[STR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP61]], ptr [[DOTCAPTURE_EXPR_13]], align 4 -// CHECK3-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK3-NEXT: store i32 [[TMP62]], ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED14]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_13]], align 4 -// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP63]], ptr [[TMP65]], align 4 -// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP63]], ptr [[TMP66]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS17]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP67]], align 4 -// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP64]], ptr [[TMP68]], align 4 -// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP64]], ptr [[TMP69]], align 4 -// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS17]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP70]], align 4 -// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP73:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK3-NEXT: [[TMP74:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP73]], 0 -// CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP75]], align 4 -// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP76]], align 4 -// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP71]], ptr [[TMP77]], align 4 -// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP72]], ptr [[TMP78]], align 4 -// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP79]], align 4 -// CHECK3-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP80]], align 4 -// CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP81]], align 4 -// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP82]], align 4 -// CHECK3-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP83]], align 8 -// CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP84]], align 8 -// CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP85]], align 4 -// CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP74]], ptr [[TMP86]], align 4 -// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP87]], align 4 -// CHECK3-NEXT: [[TMP88:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP73]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.region_id, ptr [[KERNEL_ARGS18]]) -// CHECK3-NEXT: [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0 -// CHECK3-NEXT: br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] -// CHECK3: omp_offload.failed19: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85(i32 [[TMP63]], ptr [[TMP64]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT20]] -// CHECK3: omp_offload.cont20: -// CHECK3-NEXT: [[TMP90:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 32, [[TMP90]] -// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK3-NEXT: [[TMP91:%.*]] = load ptr, ptr [[STR2_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP91]], ptr [[DOTCAPTURE_EXPR_23]], align 4 -// CHECK3-NEXT: [[TMP92:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK3-NEXT: store i32 [[TMP92]], ptr [[DOTCAPTURE_EXPR__CASTED24]], align 4 -// CHECK3-NEXT: [[TMP93:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED24]], align 4 -// CHECK3-NEXT: [[TMP94:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_23]], align 4 -// CHECK3-NEXT: [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP93]], ptr [[TMP95]], align 4 -// CHECK3-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP93]], ptr [[TMP96]], align 4 -// CHECK3-NEXT: [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS27]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP97]], align 4 -// CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP94]], ptr [[TMP98]], align 4 -// CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP94]], ptr [[TMP99]], align 4 -// CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS27]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP100]], align 4 -// CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS26]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP103:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK3-NEXT: [[TMP104:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP103]], 0 -// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP105]], align 4 -// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP106]], align 4 -// CHECK3-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP101]], ptr [[TMP107]], align 4 -// CHECK3-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP102]], ptr [[TMP108]], align 4 -// CHECK3-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.13, ptr [[TMP109]], align 4 -// CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP110]], align 4 -// CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP111]], align 4 -// CHECK3-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP112]], align 4 -// CHECK3-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP113]], align 8 -// CHECK3-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP114]], align 8 -// CHECK3-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP115]], align 4 -// CHECK3-NEXT: [[TMP116:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP104]], ptr [[TMP116]], align 4 -// CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP117]], align 4 -// CHECK3-NEXT: [[TMP118:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP103]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.region_id, ptr [[KERNEL_ARGS28]]) -// CHECK3-NEXT: [[TMP119:%.*]] = icmp ne i32 [[TMP118]], 0 -// CHECK3-NEXT: br i1 [[TMP119]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]] -// CHECK3: omp_offload.failed29: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89(i32 [[TMP93]], ptr [[TMP94]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT30]] -// CHECK3: omp_offload.cont30: -// CHECK3-NEXT: [[TMP120:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP120]], 1 -// CHECK3-NEXT: ret i32 [[ADD31]] -// -// -// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i -// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[STR:%.*]] = alloca [4 x i8], align 1 -// CHECK3-NEXT: [[STR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS12:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [4 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STR]], ptr align 1 @__const._Z9ftemplateIiET_i.str, i32 4, i1 false) -// CHECK3-NEXT: store ptr @.str.1, ptr [[STR1]], align 4 -// CHECK3-NEXT: store ptr [[STR]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.15, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] -// CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55(ptr [[TMP1]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] -// CHECK3: omp_offload.cont: -// CHECK3-NEXT: store ptr @.str.4, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_1]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP22]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.17, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP37]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP38]], align 8 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [i32 42, i32 0, i32 0], ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 42, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] -// CHECK3: omp_offload.failed7: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59(ptr [[TMP23]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT8]] -// CHECK3: omp_offload.cont8: -// CHECK3-NEXT: store i16 1, ptr [[B]], align 2 -// CHECK3-NEXT: [[TMP44:%.*]] = load i16, ptr [[B]], align 2 -// CHECK3-NEXT: store i16 [[TMP44]], ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK3-NEXT: [[TMP45:%.*]] = load ptr, ptr [[STR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP45]], ptr [[DOTCAPTURE_EXPR_10]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: store i32 [[TMP46]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = load i16, ptr [[B]], align 2 -// CHECK3-NEXT: store i16 [[TMP48]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK3-NEXT: store i16 [[TMP50]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2 -// CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP47]], ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP47]], ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP51]], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP51]], ptr [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP62]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP63]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP64]], align 4 -// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_9]], align 2 -// CHECK3-NEXT: [[TMP68:%.*]] = zext i16 [[TMP67]] to i32 -// CHECK3-NEXT: [[TMP69:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP68]], 0 -// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP70]], align 4 -// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP71]], align 4 -// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP65]], ptr [[TMP72]], align 4 -// CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP66]], ptr [[TMP73]], align 4 -// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.19, ptr [[TMP74]], align 4 -// CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP75]], align 4 -// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP76]], align 4 -// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP77]], align 4 -// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP78]], align 8 -// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP79]], align 8 -// CHECK3-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP80]], align 4 -// CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP69]], ptr [[TMP81]], align 4 -// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP82]], align 4 -// CHECK3-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP68]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.region_id, ptr [[KERNEL_ARGS14]]) -// CHECK3-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 -// CHECK3-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] -// CHECK3: omp_offload.failed15: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64(i32 [[TMP47]], i32 [[TMP49]], i32 [[TMP51]], ptr [[TMP52]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT16]] -// CHECK3: omp_offload.cont16: -// CHECK3-NEXT: [[TMP85:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: ret i32 [[TMP85]] -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104 -// CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined, ptr [[TMP1]], i32 [[TMP6]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109 -// CHECK3-SAME: (ptr noundef [[THIS:%.*]], ptr noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr [[TMP2]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined, ptr [[TMP1]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114 -// CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined, ptr [[TMP1]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77 -// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81 -// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85 -// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr [[TMP2]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89 -// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 1, ptr [[TMP2]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 -// CHECK3-SAME: (ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 1, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK3-SAME: (ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META24]] -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 42, i32 1, ptr [[ARRAYDECAY]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64 -// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP6]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined, i32 [[TMP5]], i32 [[TMP7]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META24:![0-9]+]] -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP3]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr [[TMP2]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 1, ptr [[TMP2]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP4]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined, ptr [[TMP1]], i64 [[TMP6]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], ptr noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr [[TMP2]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined, ptr [[TMP1]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP4]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined, ptr [[TMP1]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 1, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META24]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 42, i32 1, ptr [[ARRAYDECAY]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64 -// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 -// CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP6]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP7]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META25:![0-9]+]] -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP3]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l77.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP3]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l81.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr [[TMP2]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l85.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 1, ptr [[TMP2]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstaticiPKc_l89.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP4]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 1, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined, ptr [[TMP1]], i32 [[TMP6]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l104.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], ptr noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr [[TMP2]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined, ptr [[TMP1]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l109.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP4]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 2, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined, ptr [[TMP1]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l114.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(4) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 1, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(12) [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META25]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 42, i32 1, ptr [[ARRAYDECAY]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64 -// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], ptr noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 -// CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP6]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined, i32 [[TMP5]], i32 [[TMP7]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l64.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: ret void -// diff --git a/clang/test/ParserHLSL/semantic_parsing.hlsl b/clang/test/ParserHLSL/semantic_parsing.hlsl index 34df180..726dead 100644 --- a/clang/test/ParserHLSL/semantic_parsing.hlsl +++ b/clang/test/ParserHLSL/semantic_parsing.hlsl @@ -1,7 +1,41 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s -verify +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -x hlsl -o - %s -verify // expected-error@+1 {{expected HLSL Semantic identifier}} void Entry(int GI : ) { } // expected-error@+1 {{unknown HLSL semantic 'SV_IWantAPony'}} void Pony(int GI : SV_IWantAPony) { } + +// expected-error@+3 {{expected HLSL Semantic identifier}} +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} +void SuperPony(int GI : 0) { } + +// expected-error@+1 {{unknown HLSL semantic '_'}} +void MegaPony(int GI : _) { } + +// expected-error@+1 {{unknown HLSL semantic 'A0A'}} +void CoolPony(int GI : A0A0) { } + +// expected-error@+1 {{unknown HLSL semantic 'A_'}} +void NicePony(int GI : A_0) { } + +// expected-error@+1 {{unknown HLSL semantic 'A'}} +void CutePony(int GI : A00) { } + +// expected-error@+3 {{unknown HLSL semantic 'A'}} +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} +void DoublePony(int GI : A00 B) { } + +// expected-error@+1 {{unknown HLSL semantic 'é'}} +void BigPony(int GI : é) { } + +// expected-error@+2 {{unexpected character <U+1F60A>}} +// expected-error@+1 {{expected HLSL Semantic identifier}} +void UTFPony(int GI : 😊) { } + +// expected-error@+2 {{character <U+1F60A> not allowed in an identifier}} +// expected-error@+1 {{unknown HLSL semantic 'PonyWithA😊'}} +void SmilingPony(int GI : PonyWithA😊) { } diff --git a/clang/test/ParserHLSL/semantic_parsing_define.hlsl b/clang/test/ParserHLSL/semantic_parsing_define.hlsl new file mode 100644 index 0000000..b42e2e6 --- /dev/null +++ b/clang/test/ParserHLSL/semantic_parsing_define.hlsl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -x hlsl -o - %s -verify + +#define SomeDefine SV_IWantAPony + +// expected-error@7 {{unknown HLSL semantic 'SV_IWantAPony'}} +void Pony(int GI : SomeDefine) { } diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c index fcaf693..0591c59 100644 --- a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c +++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c @@ -238,3 +238,12 @@ #if __has_embed("media/art.txt" if_empty)) #endif +// expected-error@+2 {{invalid value '-1'; must be positive}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed (__FILE__ limit(-1)) +#endif + +// expected-error@+2 {{invalid value '-100000000000000000'; must be positive}}\ + expected-error@+2 {{expected value in expression}} +#if __has_embed (__FILE__ limit(-100000000000000000)) != __STDC_EMBED_NOT_FOUND__ +#endif diff --git a/clang/test/Sema/patchable-function-entry-attr.cpp b/clang/test/Sema/patchable-function-entry-attr.cpp index 7498e67..97b9c26 100644 --- a/clang/test/Sema/patchable-function-entry-attr.cpp +++ b/clang/test/Sema/patchable-function-entry-attr.cpp @@ -8,7 +8,7 @@ // RUN: %clang_cc1 -triple riscv64 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc-unknown-linux-gnu -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -fsyntax-only -verify=silence %s -// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify=AIX %s // RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify=AIX %s diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 11437d0..7dac275 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -10,45 +10,55 @@ struct MyObj { // CHECK-LABEL: Function: return_local_addr MyObj* return_local_addr() { MyObj x {10}; - MyObj* p = &x; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_X:[0-9]+]], ToOrigin: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) + MyObj* p = &x; // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) return p; +// CHECK: Use ([[O_P]] (Decl: p), Read) // CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) // CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_X]]) +// CHECK: Expire ([[L_X]] (Path: x)) } // Pointer Assignment and Return // CHECK-LABEL: Function: assign_and_return_local_addr -// CHECK-NEXT: Block B{{[0-9]+}}: MyObj* assign_and_return_local_addr() { MyObj y{20}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) MyObj* ptr1 = &y; -// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], ToOrigin: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) MyObj* ptr2 = ptr1; -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) -// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) ptr2 = ptr1; -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: Use ({{[0-9]+}} (Decl: ptr2), Write) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) ptr2 = ptr2; // Self assignment. -// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Write) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) return ptr2; -// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) -// CHECK: ReturnOfOrigin ([[O_PTR2_RVAL_2]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_Y]]) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) +// CHECK: Expire ([[L_Y]] (Path: y)) } // Return of Non-Pointer Type // CHECK-LABEL: Function: return_int_val -// CHECK-NEXT: Block B{{[0-9]+}}: int return_int_val() { int x = 10; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: {{[0-9]+}} (Expr: DeclRefExpr)) return x; } // CHECK-NEXT: End of Block @@ -56,25 +66,27 @@ int return_int_val() { // Loan Expiration (Automatic Variable, C++) // CHECK-LABEL: Function: loan_expires_cpp -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_cpp() { MyObj obj{1}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_OBJ:[0-9]+]] (Path: obj), ToOrigin: [[O_DRE_OBJ:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OBJ]] (Expr: DeclRefExpr)) MyObj* pObj = &obj; -// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_POBJ:[0-9]+]] (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_OBJ]]) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_OBJ]] (Path: obj)) } // FIXME: No expire for Trivial Destructors // CHECK-LABEL: Function: loan_expires_trivial -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_trivial() { int trivial_obj = 1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj), ToOrigin: [[O_DRE_TRIVIAL:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) int* pTrivialObj = &trivial_obj; -// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PTOBJ:[0-9]+]] (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) -// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) +// CHECK-NOT: Expire // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. } @@ -86,16 +98,22 @@ void conditional(bool condition) { int* p = nullptr; if (condition) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) p = &a; -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) else +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_B]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) p = &b; -// CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) +// CHECK: Block B{{[0-9]+}}: int *q = p; -// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) -// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) } @@ -109,26 +127,36 @@ void pointers_in_a_cycle(bool condition) { MyObj* p2 = &v2; MyObj* p3 = &v3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], ToOrigin: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V1:[0-9]+]] (Path: v1), ToOrigin: [[O_DRE_V1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) -// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], ToOrigin: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V2:[0-9]+]] (Path: v2), ToOrigin: [[O_DRE_V2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V2]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) -// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], ToOrigin: [[O_ADDR_V3:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V3:[0-9]+]] (Path: v3), ToOrigin: [[O_DRE_V3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V3:[0-g]+]] (Expr: UnaryOperator), Src: [[O_DRE_V3]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) while (condition) { - MyObj* temp = p1; - p1 = p2; - p2 = p3; - p3 = temp; // CHECK: Block B{{[0-9]+}}: + MyObj* temp = p1; +// CHECK: Use ([[O_P1]] (Decl: p1), Read) // CHECK: AssignOrigin (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) // CHECK: AssignOrigin (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) + p1 = p2; +// CHECK: Use ([[O_P2:[0-9]+]] (Decl: p2), Read) // CHECK: AssignOrigin (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) +// CHECK: Use ({{[0-9]+}} (Decl: p1), Write) // CHECK: AssignOrigin (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) + p2 = p3; +// CHECK: Use ([[O_P3:[0-9]+]] (Decl: p3), Read) // CHECK: AssignOrigin (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) +// CHECK: Use ({{[0-9]+}} (Decl: p2), Write) // CHECK: AssignOrigin (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) + p3 = temp; +// CHECK: Use ([[O_TEMP]] (Decl: temp), Read) // CHECK: AssignOrigin (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) +// CHECK: Use ({{[0-9]+}} (Decl: p3), Write) // CHECK: AssignOrigin (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) } } @@ -137,28 +165,33 @@ void pointers_in_a_cycle(bool condition) { void overwrite_origin() { MyObj s1; MyObj s2; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = &s2; -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: reassign_to_null void reassign_to_null() { MyObj s1; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = nullptr; // CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. @@ -170,17 +203,20 @@ void reassign_in_if(bool condition) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) if (condition) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } @@ -195,42 +231,51 @@ void assign_in_switch(int mode) { // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) switch (mode) { case 1: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s1; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK-DAG: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) break; case 2: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s2; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK-DAG: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; default: - p = &s3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], ToOrigin: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator)) + p = &s3; +// CHECK: Issue ([[L_S3:[0-9]+]] (Path: s3), ToOrigin: [[O_DRE_S3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S3]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) break; } // CHECK: Block B{{[0-9]+}}: -// CHECK-DAG: Expire (LoanID: [[L_S3]]) -// CHECK-DAG: Expire (LoanID: [[L_S2]]) -// CHECK-DAG: Expire (LoanID: [[L_S1]]) +// CHECK-DAG: Expire ([[L_S3]] (Path: s3)) +// CHECK-DAG: Expire ([[L_S2]] (Path: s2)) +// CHECK-DAG: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { MyObj* p = nullptr; - // CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) - // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) while (condition) { MyObj inner; - p = &inner; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) + p = &inner; +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_INNER]]) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } } @@ -240,20 +285,23 @@ void loop_with_break(int count) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) for (int i = 0; i < count; ++i) { if (i == 5) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; } } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: nested_scopes @@ -265,32 +313,36 @@ void nested_scopes() { { MyObj outer; p = &outer; -// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], ToOrigin: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_OUTER:[0-9]+]] (Path: outer), ToOrigin: [[O_DRE_OUTER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OUTER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) { MyObj inner; p = &inner; -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) } -// CHECK: Expire (LoanID: [[L_INNER]]) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } -// CHECK: Expire (LoanID: [[L_OUTER]]) +// CHECK: Expire ([[L_OUTER]] (Path: outer)) } // CHECK-LABEL: Function: pointer_indirection void pointer_indirection() { int a; int *p = &a; -// CHECK: Block B1: -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) int **pp = &p; -// CHECK: Issue (LoanID: [[L_P:[0-g]+]], ToOrigin: [[O_ADDR_P:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PP:[0-9]+]] (Decl: pp), Src: [[O_ADDR_P]] (Expr: UnaryOperator)) - -// FIXME: The Origin for the RHS is broken +// Note: No facts are generated for &p because the subexpression is a pointer type, +// which is not yet supported by the origin model. This is expected. int *q = *pp; +// CHECK: Use ([[O_PP:[0-9]+]] (Decl: pp), Read) // CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) } @@ -300,41 +352,41 @@ void ternary_operator() { int a, b; int *p; p = (a > b) ? &a : &b; - // CHECK: Block B2: - // CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) - // CHECK: End of Block - - // CHECK: Block B3: - // CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) - // CHECK: End of Block - - // CHECK: Block B1: - // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) - // CHECK: End of Block +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) } // CHECK-LABEL: Function: test_use_facts void usePointer(MyObj*); void test_use_facts() { - // CHECK: Block B{{[0-9]+}}: MyObj x; MyObj *p; +// CHECK: Block B{{[0-9]+}}: p = &x; - // CHECK: Use ([[O_P:[0-9]+]] (Decl: p) Write) +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Use ([[O_P:[0-9]+]] (Decl: p), Write) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) (void)*p; - // CHECK: Use ([[O_P]] (Decl: p) Read) +// CHECK: Use ([[O_P]] (Decl: p), Read) usePointer(p); - // CHECK: Use ([[O_P]] (Decl: p) Read) +// CHECK: Use ([[O_P]] (Decl: p), Read) p->id = 1; - // CHECK: Use ([[O_P]] (Decl: p) Read) - - +// CHECK: Use ([[O_P]] (Decl: p), Read) MyObj* q; q = p; - // CHECK: Use ([[O_P]] (Decl: p) Read) - // CHECK: Use ([[O_Q:[0-9]+]] (Decl: q) Write) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: Use ([[O_Q:[0-9]+]] (Decl: q), Write) usePointer(q); - // CHECK: Use ([[O_Q]] (Decl: q) Read) +// CHECK: Use ([[O_Q]] (Decl: q), Read) q->id = 2; - // CHECK: Use ([[O_Q]] (Decl: q) Read) -} +// CHECK: Use ([[O_Q]] (Decl: q), Read) +// CHECK: Expire ([[L_X]] (Path: x)) +}
\ No newline at end of file diff --git a/clang/test/Sema/warn-thread-safety-analysis.c b/clang/test/Sema/warn-thread-safety-analysis.c index b43f97a..549cb12 100644 --- a/clang/test/Sema/warn-thread-safety-analysis.c +++ b/clang/test/Sema/warn-thread-safety-analysis.c @@ -184,9 +184,13 @@ int main(void) { /// Cleanup functions { struct Mutex* const __attribute__((cleanup(unlock_scope))) scope = &mu1; - mutex_exclusive_lock(scope); // Note that we have to lock through scope, because no alias analysis! + mutex_exclusive_lock(scope); // Lock through scope works. // Cleanup happens automatically -> no warning. } + { + struct Mutex* const __attribute__((unused, cleanup(unlock_scope))) scope = &mu1; + mutex_exclusive_lock(&mu1); // With basic alias analysis lock through mu1 also works. + } foo_.a_value = 0; // expected-warning {{writing variable 'a_value' requires holding mutex 'mu_' exclusively}} *foo_.a_ptr = 1; // expected-warning {{writing the value pointed to by 'a_ptr' requires holding mutex 'bar.other_mu' exclusively}} diff --git a/clang/test/SemaCXX/builtin-get-vtable-pointer.cpp b/clang/test/SemaCXX/builtin-get-vtable-pointer.cpp index b04b38d..b99bbf0 100644 --- a/clang/test/SemaCXX/builtin-get-vtable-pointer.cpp +++ b/clang/test/SemaCXX/builtin-get-vtable-pointer.cpp @@ -66,9 +66,7 @@ struct PolymorphicTemplate { }; void test_function(int); // expected-note{{possible target for call}} - // expected-note@-1{{possible target for call}} void test_function(double); // expected-note{{possible target for call}} - // expected-note@-1{{possible target for call}} void getVTablePointer() { ForwardDeclaration *fd = nullptr; @@ -89,7 +87,6 @@ void getVTablePointer() { __builtin_get_vtable_pointer(np_array); // expected-error{{__builtin_get_vtable_pointer requires an argument of polymorphic class pointer type, but 'NonPolymorphic' has no virtual methods}} __builtin_get_vtable_pointer(&np_array); // expected-error{{__builtin_get_vtable_pointer requires an argument of class pointer type, but 'NonPolymorphic (*)[1]' was provided}} __builtin_get_vtable_pointer(test_function); // expected-error{{reference to overloaded function could not be resolved; did you mean to call it?}} - // expected-error@-1{{reference to overloaded function could not be resolved; did you mean to call it?}} Foo<double> Food; Foo<int> Fooi; __builtin_get_vtable_pointer(Food); // expected-error{{__builtin_get_vtable_pointer requires an argument of class pointer type, but 'Foo<double>' was provided}} diff --git a/clang/test/SemaCXX/builtin-overload-resolution.cpp b/clang/test/SemaCXX/builtin-overload-resolution.cpp new file mode 100644 index 0000000..81d3055 --- /dev/null +++ b/clang/test/SemaCXX/builtin-overload-resolution.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -std=c++20 %s -emit-obj -o /dev/null + +const int* test_odr_used() { + // This previously crashed due to Value improperly being removed from + // MaybeODRUseExprs. + static constexpr int Value = 0; + return __builtin_addressof(Value); +} diff --git a/clang/test/SemaCXX/labeled-break-continue-constexpr.cpp b/clang/test/SemaCXX/labeled-break-continue-constexpr.cpp index bec6c58..d1b57ad 100644 --- a/clang/test/SemaCXX/labeled-break-continue-constexpr.cpp +++ b/clang/test/SemaCXX/labeled-break-continue-constexpr.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fnamed-loops -std=c++23 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fnamed-loops -std=c++23 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter // expected-no-diagnostics struct Tracker { diff --git a/clang/test/SemaCXX/sugar-common-types.cpp b/clang/test/SemaCXX/sugar-common-types.cpp index dd5fc4a..4db0d2a 100644 --- a/clang/test/SemaCXX/sugar-common-types.cpp +++ b/clang/test/SemaCXX/sugar-common-types.cpp @@ -203,3 +203,27 @@ namespace member_pointers { N t3 = 0 ? &W1::a : &W2::b; // expected-error@-1 {{rvalue of type 'B1 member_pointers::W<void>::*'}} } // namespace member_pointers + +namespace FunctionTypeExtInfo { + namespace RecordType { + class A; + void (*x)(__attribute__((swift_async_context)) A *); + + class A; + void (*y)(__attribute__((swift_async_context)) A *); + + N t1 = 0 ? x : y; + // expected-error@-1 {{lvalue of type 'void (*)(__attribute__((swift_async_context)) A *)'}} + } // namespace RecordType + namespace TypedefType { + class A; + using B = A; + void (*x)(__attribute__((swift_async_context)) B *); + + using B = A; + void (*y)(__attribute__((swift_async_context)) B *); + + N t1 = 0 ? x : y; + // expected-error@-1 {{lvalue of type 'void (*)(__attribute__((swift_async_context)) B *)'}} + } // namespace TypedefType +} // namespace FunctionTypeExtInfo diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index d82e248..ef662b7 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -1556,9 +1556,9 @@ void main() { Child *c; Base *b = c; - b->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'b->mu_' exclusively}} + b->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'c->mu_' exclusively}} b->mu_.Lock(); - b->func2(); // expected-warning {{cannot call function 'func2' while mutex 'b->mu_' is held}} + b->func2(); // expected-warning {{cannot call function 'func2' while mutex 'c->mu_' is held}} b->mu_.Unlock(); c->func1(); // expected-warning {{calling function 'func1' requires holding mutex 'c->mu_' exclusively}} @@ -6875,6 +6875,34 @@ class PointerGuard { }; } // namespace Derived_Smart_Pointer +// Test for capabilities that are heap-allocated and stored in static variables. +namespace FunctionStaticVariable { +struct Data { + Mutex mu; + int x GUARDED_BY(mu); +}; + +void testStaticVariable() { +} + +void testHeapAllocation() { + static Data *d = new Data; + d->mu.Lock(); + d->x = 5; + d->mu.Unlock(); +} + +void testHeapAllocationBug() { + static auto *d = new Data; + d->x = 10; // expected-warning{{writing variable 'x' requires holding mutex 'd->mu' exclusively}} +} + +void testHeapAllocationScopedLock() { + static Mutex *mu = new Mutex; + MutexLock lock(mu); +} +} // namespace FunctionStaticVariable + namespace Reentrancy { class LOCKABLE REENTRANT_CAPABILITY ReentrantMutex { @@ -7238,3 +7266,341 @@ public: }; } // namespace Reentrancy + +// Tests for tracking aliases of capabilities. +namespace CapabilityAliases { +struct Foo { + Mutex mu; + int data GUARDED_BY(mu); +}; + +Foo *returnsFoo(); +Foo *returnsFoo(Foo *foo); +void locksRequired(Foo *foo) EXCLUSIVE_LOCKS_REQUIRED(foo->mu); +void escapeAlias(int a, Foo *&ptr); +void escapeAlias(int b, Foo **ptr); +void passByConstRef(Foo* const& ptr); + +void testBasicPointerAlias(Foo *f) { + Foo *ptr = f; + ptr->mu.Lock(); // lock through alias + f->data = 42; // access through original + ptr->mu.Unlock(); // unlock through alias +} + +void testBasicPointerAliasNoInit(Foo *f) { + Foo *ptr; + + ptr = nullptr; + ptr = f; + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); + ptr = nullptr; +} + +void testBasicPointerAliasLoop() { + for (;;) { + Foo *f = returnsFoo(); + Foo *ptr = f; + if (!ptr) + break; + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); + } +} + +void testPointerAliasNoEscape1(Foo *f) { + Foo *ptr = f; + testBasicPointerAlias(ptr); // pass alias by value + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasNoEscape2(Foo *f) { + Foo *ptr = f; + passByConstRef(ptr); // pass alias by const ref + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasNoEscape3() { + Foo *ptr = returnsFoo(); + ptr->mu.Lock(); + locksRequired(ptr); + ptr->mu.Unlock(); +} + +void testPointerAliasEscape1(Foo *f) { + Foo *ptr = f; + escapeAlias(0, ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscape2(Foo *f) { + Foo *ptr = f; + escapeAlias(0, &ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscape3(Foo *f) { + Foo *ptr; + + ptr = f; + escapeAlias(0, &ptr); + + ptr->mu.Lock(); + f->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + ptr->mu.Unlock(); +} + +void testPointerAliasEscapeAndReset(Foo *f) { + Foo *ptr; + + ptr = f; + escapeAlias(0, &ptr); + ptr = f; + + ptr->mu.Lock(); + f->data = 42; + ptr->mu.Unlock(); +} + +void testPointerAliasTryLock1() { + Foo *ptr = returnsFoo(); + if (ptr->mu.TryLock()) { + locksRequired(ptr); + ptr->mu.Unlock(); + } +} + +void testPointerAliasTryLock2() { + Foo *ptr; + ptr = returnsFoo(); + Foo *ptr2 = ptr; + if (ptr->mu.TryLock()) { + locksRequired(ptr); + ptr2->mu.Unlock(); + } +} + +// FIXME: This test demonstrates a dubious pattern that the analysis correctly +// flags as unsafe, though the user might perceive it as a false positive. The +// pattern combines a TryLock() failure path with a conditional reassignment of +// the pointer being locked: +// +// 1. The conditional reassignment `ptr = returnsFoo(ptr);` forces `ptr` to +// become a phi node in the CFG at the subsequent merge point. The alias +// analysis correctly tracks that `ptr` could refer to one of two distinct +// objects. +// +// 2. The lock acquired on the `!TryLock()` path should be (conceptually) +// `phi(P1, P2)->mu`, while the lock on the successful path is on the +// original `P1->mu`. When the paths merge the analysis currently discards +// the alias as it cannot prove a single alias on that path. +// +// While this pattern is stylistically fragile and difficult to reason about, a +// robust solution would require a more advanced symbolic representation of +// capabilities within the analyzer. For now, we warn on such ambiguity. +void testPointerAliasTryLockDubious(int x) { + Foo *ptr = returnsFoo(); + if (!ptr->mu.TryLock()) { // expected-note{{mutex acquired here}} + if (x) + ptr = returnsFoo(ptr); // <-- this breaks the pattern + ptr->mu.Lock(); // expected-note{{mutex acquired here}} + } + ptr->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'ptr->mu' exclusively}} \ + // expected-warning{{mutex 'ptr->mu' is not held on every path through here}} \ + // expected-warning{{mutex 'returnsFoo().mu' is not held on every path through here}} + ptr->mu.Unlock(); // expected-warning{{releasing mutex 'ptr->mu' that was not held}} +} + +void testReassignment() { + Foo f1, f2; + Foo *ptr = &f1; + ptr->mu.Lock(); + f1.data = 42; + ptr->mu.Unlock(); + + ptr = &f2; + ptr->mu.Lock(); + f2.data = 42; + f1.data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f1.mu'}} \ + // expected-note{{found near match 'f2.mu'}} + ptr->mu.Unlock(); +} + +// Nested field access through pointer +struct Container { + Foo foo; +}; + +void testNestedAccess(Container *c) { + Foo *ptr = &c->foo; // pointer to nested field + ptr->mu.Lock(); + c->foo.data = 42; + ptr->mu.Unlock(); +} + +void testNestedAcquire(Container *c) EXCLUSIVE_LOCK_FUNCTION(&c->foo.mu) { + Foo *buf = &c->foo; + buf->mu.Lock(); +} + +struct ContainerOfPtr { + Foo *foo_ptr; +}; + +void testIndirectAccess(ContainerOfPtr *fc) { + Foo *ptr = fc->foo_ptr; // get pointer + ptr->mu.Lock(); + fc->foo_ptr->data = 42; // access via original + ptr->mu.Unlock(); +} + +void testControlFlowDoWhile(Foo *f, int x) { + Foo *ptr = f; + + f->mu.Lock(); + if (x) { + // complex merge + do { } while (x--); + } + ptr->data = 42; + ptr->mu.Unlock(); +} + +// FIXME: No alias tracking through complex control flow. +void testComplexControlFlow(Foo *f1, Foo *f2, bool cond) { + Foo *ptr; + if (cond) { + ptr = f1; + } else { + ptr = f2; + } + ptr->mu.Lock(); + if (cond) { + f1->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f1->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + } else { + f2->data = 42; // expected-warning{{writing variable 'data' requires holding mutex 'f2->mu' exclusively}} \ + // expected-note{{found near match 'ptr->mu'}} + } + ptr->mu.Unlock(); +} + +void testLockFunction(Foo *f) EXCLUSIVE_LOCK_FUNCTION(&f->mu) { + Mutex *mu = &f->mu; + mu->Lock(); +} + +void testUnlockFunction(Foo *f) UNLOCK_FUNCTION(&f->mu) { + Mutex *mu = &f->mu; + mu->Unlock(); +} + +// This is an idiom to deal with "pointer to returned object has a lock held, +// but you must unlock it later" where the statement expression would be hidden +// behind a macro. +void lockWithinStatementExpr() { + Foo *f = ({ auto x = returnsFoo(); x->mu.Lock(); x; }); + f->data = 42; + f->mu.Unlock(); +} + +// Semantically UB, but let's not crash the compiler with this (should be +// handled by -Wuninitialized). +void testSelfInit() { + Mutex *mu = mu; // don't do this at home + mu->Lock(); + mu->Unlock(); +} + +void testSelfAssign() { + Foo *f = returnsFoo(); + f = f; + f->mu.Lock(); + f->data = 42; + f->mu.Unlock(); +} + +void testRecursiveAssign() { + Foo *f = returnsFoo(); + f = returnsFoo(f); + f->mu.Lock(); + f->data = 42; + f->mu.Unlock(); +} + +void testNew(Mutex *&out, int &x) { + Mutex *mu = new Mutex; + __atomic_store_n(&out, mu, __ATOMIC_RELEASE); + mu->Lock(); + x = 42; // ... perhaps guarded by mu + mu->Unlock(); +} + +void testNestedLoopInvariant(Container *c, int n) { + Foo *ptr = &c->foo; + ptr->mu.Lock(); + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + } + } + + c->foo.data = 42; // ok: alias still valid + ptr->mu.Unlock(); +} + +void testLoopWithBreak(Foo *f, bool cond) { + Foo *ptr = f; + ptr->mu.Lock(); + for (int i = 0; i < 10; ++i) { + if (cond) { + break; // merge point is after the loop + } + } + f->data = 42; // ok + ptr->mu.Unlock(); +} + +void testLoopWithContinue(Foo *f, bool cond) { + Foo *ptr = f; + ptr->mu.Lock(); + for (int i = 0; i < 10; ++i) { + if (cond) { + continue; // tests merge at the top of loop. + } + } + f->data = 42; // ok + ptr->mu.Unlock(); +} + +void testLoopConditionalReassignment(Foo *f1, Foo *f2, bool cond) { + Foo *ptr = f1; + ptr->mu.Lock(); // expected-note{{mutex acquired here}} + + for (int i = 0; i < 10; ++i) { + if (cond) { + ptr = f2; // alias is reassigned on some path inside the loop. + } + } + f1->data = 42; + ptr->mu.Unlock(); // expected-warning{{releasing mutex 'ptr->mu' that was not held}} +} // expected-warning{{mutex 'f1->mu' is still held at the end of function}} +} // namespace CapabilityAliases diff --git a/clang/test/SemaHLSL/RootSignature-err.hlsl b/clang/test/SemaHLSL/RootSignature-err.hlsl index ccfa093..89c684c 100644 --- a/clang/test/SemaHLSL/RootSignature-err.hlsl +++ b/clang/test/SemaHLSL/RootSignature-err.hlsl @@ -179,7 +179,7 @@ void basic_validation_3() {} // expected-error@+2 {{value must be in the range [1, 4294967294]}} // expected-error@+1 {{value must be in the range [1, 4294967294]}} -[RootSignature("DescriptorTable(UAV(u0, numDescriptors = 0), Sampler(s0, numDescriptors = 0))")] +[RootSignature("DescriptorTable(UAV(u0, numDescriptors = 0)), DescriptorTable(Sampler(s0, numDescriptors = 0))")] void basic_validation_4() {} // expected-error@+2 {{value must be in the range [0, 16]}} @@ -189,4 +189,8 @@ void basic_validation_5() {} // expected-error@+1 {{value must be in the range [-16.00, 15.99]}} [RootSignature("StaticSampler(s0, mipLODBias = 15.990001)")] -void basic_validation_6() {}
\ No newline at end of file +void basic_validation_6() {} + +// expected-error@+1 {{sampler and non-sampler resource mixed in descriptor table}} +[RootSignature("DescriptorTable(Sampler(s0), CBV(b0))")] +void mixed_resource_table() {} diff --git a/clang/test/SemaHLSL/RootSignature-resource-ranges-err.hlsl b/clang/test/SemaHLSL/RootSignature-resource-ranges-err.hlsl index fd098b0..2d025d0 100644 --- a/clang/test/SemaHLSL/RootSignature-resource-ranges-err.hlsl +++ b/clang/test/SemaHLSL/RootSignature-resource-ranges-err.hlsl @@ -117,3 +117,28 @@ void bad_root_signature_14() {} // expected-note@+1 {{overlapping resource range here}} [RootSignature(DuplicatesRootSignature)] void valid_root_signature_15() {} + +#define AppendingToUnbound \ + "DescriptorTable(CBV(b1, numDescriptors = unbounded), CBV(b0))" + +// expected-error@+1 {{offset appends to unbounded descriptor range}} +[RootSignature(AppendingToUnbound)] +void append_to_unbound_signature() {} + +#define DirectOffsetOverflow \ + "DescriptorTable(CBV(b0, offset = 4294967294 , numDescriptors = 6))" + +// expected-error@+1 {{descriptor range offset overflows [4294967294, 4294967299]}} +[RootSignature(DirectOffsetOverflow)] +void direct_offset_overflow_signature() {} + +#define AppendOffsetOverflow \ + "DescriptorTable(CBV(b0, offset = 4294967292), CBV(b1, numDescriptors = 7))" + +// expected-error@+1 {{descriptor range offset overflows [4294967293, 4294967299]}} +[RootSignature(AppendOffsetOverflow)] +void append_offset_overflow_signature() {} + +// expected-error@+1 {{descriptor range offset overflows [4294967292, 4294967296]}} +[RootSignature("DescriptorTable(CBV(b0, offset = 4294967292, numDescriptors = 5))")] +void offset_() {} diff --git a/clang/test/SemaHLSL/RootSignature-resource-ranges.hlsl b/clang/test/SemaHLSL/RootSignature-resource-ranges.hlsl index 09a1110..10e7215 100644 --- a/clang/test/SemaHLSL/RootSignature-resource-ranges.hlsl +++ b/clang/test/SemaHLSL/RootSignature-resource-ranges.hlsl @@ -22,3 +22,6 @@ void valid_root_signature_5() {} [RootSignature("DescriptorTable(SRV(t5), UAV(u5, numDescriptors=2))")] void valid_root_signature_6() {} + +[RootSignature("DescriptorTable(CBV(b0, offset = 4294967292), CBV(b1, numDescriptors = 3))")] +void valid_root_signature_7() {} diff --git a/clang/test/SemaHLSL/RootSignature-target-err.hlsl b/clang/test/SemaHLSL/RootSignature-target-err.hlsl new file mode 100644 index 0000000..49aca9e --- /dev/null +++ b/clang/test/SemaHLSL/RootSignature-target-err.hlsl @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-rootsignature -hlsl-entry NotFoundRS -fsyntax-only %s -verify + +// expected-error@* {{rootsignature specified as target environment but entry, NotFoundRS, was not defined}} + +#define EntryRootSig "CBV(b0)" diff --git a/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl b/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl index 1bb4ee5..070075d 100644 --- a/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl +++ b/clang/test/SemaHLSL/Semantics/invalid_entry_parameter.hlsl @@ -17,13 +17,13 @@ void CSMain2(ST ID : SV_DispatchThreadID) { } void foo() { -// expected-warning@+1 {{'SV_DispatchThreadID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_DispatchThreadID' attribute only applies to parameters, non-static data members, and functions}} uint V : SV_DispatchThreadID; } struct ST2 { -// expected-warning@+1 {{'SV_DispatchThreadID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_DispatchThreadID' attribute only applies to parameters, non-static data members, and functions}} static uint X : SV_DispatchThreadID; uint s : SV_DispatchThreadID; }; @@ -40,12 +40,12 @@ void CSMain2_GID(ST GID : SV_GroupID) { } void foo_GID() { -// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters, non-static data members, and functions}} uint GIS : SV_GroupID; } struct ST2_GID { -// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_GroupID' attribute only applies to parameters, non-static data members, and functions}} static uint GID : SV_GroupID; uint s_gid : SV_GroupID; }; @@ -62,12 +62,12 @@ void CSMain2_GThreadID(ST GID : SV_GroupThreadID) { } void foo_GThreadID() { -// expected-warning@+1 {{'SV_GroupThreadID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_GroupThreadID' attribute only applies to parameters, non-static data members, and functions}} uint GThreadIS : SV_GroupThreadID; } struct ST2_GThreadID { -// expected-warning@+1 {{'SV_GroupThreadID' attribute only applies to parameters and non-static data members}} +// expected-warning@+1 {{'SV_GroupThreadID' attribute only applies to parameters, non-static data members, and functions}} static uint GThreadID : SV_GroupThreadID; uint s_gthreadid : SV_GroupThreadID; }; diff --git a/clang/test/SemaObjC/non-trivial-c-union.m b/clang/test/SemaObjC/non-trivial-c-union.m index 34f1caa..39fbe2d 100644 --- a/clang/test/SemaObjC/non-trivial-c-union.m +++ b/clang/test/SemaObjC/non-trivial-c-union.m @@ -87,3 +87,10 @@ void testVolatileLValueToRValue(volatile U0 *a) { void unionInSystemHeader0(U0_SystemHeader); void unionInSystemHeader1(U1_SystemHeader); // expected-error {{cannot use type 'U1_SystemHeader' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U1_SystemHeader' for a function/method parameter since it is a union that is non-trivial to copy}} + +void testAddressof(void) { + extern volatile U0 t0; + // These don't dereference so they shouldn't cause an error. + (void)&t0; + (void)__builtin_addressof(t0); +} diff --git a/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp b/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp index e24bea1..cc017a0 100644 --- a/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp +++ b/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: mkdir -p %t/abc/def/ijk/qwe diff --git a/clang/test/Tooling/clang-check-pwd.cpp b/clang/test/Tooling/clang-check-pwd.cpp index 76547b3..309cee5 100644 --- a/clang/test/Tooling/clang-check-pwd.cpp +++ b/clang/test/Tooling/clang-check-pwd.cpp @@ -1,5 +1,4 @@ -// Needs symlinks -// UNSUPPORTED: system-windows +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/unittests/AST/DeclTest.cpp b/clang/unittests/AST/DeclTest.cpp index e76edbf..b95d361 100644 --- a/clang/unittests/AST/DeclTest.cpp +++ b/clang/unittests/AST/DeclTest.cpp @@ -12,10 +12,12 @@ #include "clang/AST/Decl.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Mangle.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/ABI.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/TargetInfo.h" @@ -102,6 +104,124 @@ TEST(Decl, AsmLabelAttr) { "foo"); } +TEST(Decl, AsmLabelAttr_LLDB) { + StringRef Code = R"( + struct S { + void f() {} + S() = default; + ~S() = default; + }; + )"; + auto AST = + tooling::buildASTFromCodeWithArgs(Code, {"-target", "i386-apple-darwin"}); + ASTContext &Ctx = AST->getASTContext(); + assert(Ctx.getTargetInfo().getUserLabelPrefix() == StringRef("_") && + "Expected target to have a global prefix"); + DiagnosticsEngine &Diags = AST->getDiagnostics(); + + const auto *DeclS = + selectFirst<CXXRecordDecl>("d", match(cxxRecordDecl().bind("d"), Ctx)); + + auto *DeclF = *DeclS->method_begin(); + auto *Ctor = *DeclS->ctor_begin(); + auto *Dtor = DeclS->getDestructor(); + + ASSERT_TRUE(DeclF); + ASSERT_TRUE(Ctor); + ASSERT_TRUE(Dtor); + + DeclF->addAttr(AsmLabelAttr::Create(Ctx, "$__lldb_func::123:123:_Z1fv")); + Ctor->addAttr(AsmLabelAttr::Create(Ctx, "$__lldb_func::123:123:S")); + Dtor->addAttr(AsmLabelAttr::Create(Ctx, "$__lldb_func::123:123:~S")); + + std::unique_ptr<ItaniumMangleContext> MC( + ItaniumMangleContext::create(Ctx, Diags)); + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(DeclF, OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func::123:123:_Z1fv"); + }; + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Ctor, CXXCtorType::Ctor_Complete), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:C0:123:123:S"); + }; + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Ctor, CXXCtorType::Ctor_Base), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:C1:123:123:S"); + }; + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Dtor, CXXDtorType::Dtor_Deleting), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:D0:123:123:~S"); + }; + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Dtor, CXXDtorType::Dtor_Base), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:D2:123:123:~S"); + }; +} + +TEST(Decl, AsmLabelAttr_LLDB_Inherit) { + StringRef Code = R"( + struct Base { + Base(int x) {} + }; + + struct Derived : Base { + using Base::Base; + } d(5); + )"; + auto AST = + tooling::buildASTFromCodeWithArgs(Code, {"-target", "i386-apple-darwin"}); + ASTContext &Ctx = AST->getASTContext(); + assert(Ctx.getTargetInfo().getUserLabelPrefix() == StringRef("_") && + "Expected target to have a global prefix"); + DiagnosticsEngine &Diags = AST->getDiagnostics(); + + const auto *Ctor = selectFirst<CXXConstructorDecl>( + "ctor", + match(cxxConstructorDecl(isInheritingConstructor()).bind("ctor"), Ctx)); + + const_cast<CXXConstructorDecl *>(Ctor)->addAttr( + AsmLabelAttr::Create(Ctx, "$__lldb_func::123:123:Derived")); + + std::unique_ptr<ItaniumMangleContext> MC( + ItaniumMangleContext::create(Ctx, Diags)); + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Ctor, CXXCtorType::Ctor_Complete), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:CI0:123:123:Derived"); + }; + + { + std::string Mangled; + llvm::raw_string_ostream OS_Mangled(Mangled); + MC->mangleName(GlobalDecl(Ctor, CXXCtorType::Ctor_Base), OS_Mangled); + + ASSERT_EQ(Mangled, "\x01$__lldb_func:CI1:123:123:Derived"); + }; +} + TEST(Decl, MangleDependentSizedArray) { StringRef Code = R"( template <int ...N> diff --git a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp index 88b9266..57162cd 100644 --- a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp @@ -8,8 +8,15 @@ #include "clang/Analysis/FlowSensitive/RecordOps.h" #include "TestingSupport.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" +#include <string> namespace clang { namespace dataflow { @@ -190,7 +197,7 @@ TEST(RecordOpsTest, RecordsEqual) { }); } -TEST(TransferTest, CopyRecordBetweenDerivedAndBase) { +TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) { std::string Code = R"( struct A { int i; @@ -266,6 +273,67 @@ TEST(TransferTest, CopyRecordBetweenDerivedAndBase) { }); } +TEST(RecordOpsTest, CopyRecordWithExplicitSharedBaseTypeToCopy) { + std::string Code = R"( + struct Base { + bool BaseField; + char UnmodeledField; + }; + + struct DerivedOne : public Base { + int DerivedOneField; + }; + + struct DerivedTwo : public Base { + int DerivedTwoField; + }; + + void target(Base B, DerivedOne D1, DerivedTwo D2) { + (void) B.BaseField; + // [[p]] + } + )"; + auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap<QualType> { + CXXRecordDecl *BaseDecl = nullptr; + std::string TypeAsString = Ty.getAsString(); + if (TypeAsString == "Base") + BaseDecl = Ty->getAsCXXRecordDecl(); + else if (TypeAsString == "DerivedOne" || TypeAsString == "DerivedTwo") + BaseDecl = Ty->getAsCXXRecordDecl() + ->bases_begin() + ->getType() + ->getAsCXXRecordDecl(); + else + return {}; + QualType FieldType = getFieldNamed(BaseDecl, "BaseField")->getType(); + return {{"synth_field", FieldType}}; + }; + // Test copying derived to base class. + runDataflow( + Code, SyntheticFieldCallback, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + Environment Env = getEnvironmentAtAnnotation(Results, "p").fork(); + + const ValueDecl *BaseFieldDecl = findValueDecl(ASTCtx, "BaseField"); + auto &B = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "B"); + auto &D1 = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "D1"); + auto &D2 = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "D2"); + + EXPECT_NE(Env.getValue(*D1.getChild(*BaseFieldDecl)), + Env.getValue(*D2.getChild(*BaseFieldDecl))); + EXPECT_NE(Env.getValue(D1.getSyntheticField("synth_field")), + Env.getValue(D2.getSyntheticField("synth_field"))); + + copyRecord(D1, D2, Env, B.getType()); + + EXPECT_EQ(Env.getValue(*D1.getChild(*BaseFieldDecl)), + Env.getValue(*D2.getChild(*BaseFieldDecl))); + EXPECT_EQ(Env.getValue(D1.getSyntheticField("synth_field")), + Env.getValue(D2.getSyntheticField("synth_field"))); + }); +} + } // namespace } // namespace test } // namespace dataflow diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 214aaee..d97e2b0 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -9,17 +9,25 @@ #include "TestingSupport.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" #include "clang/Analysis/FlowSensitive/RecordOps.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Basic/LangStandard.h" #include "clang/Testing/TestAST.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -27,6 +35,7 @@ #include <string> #include <string_view> #include <utility> +#include <vector> namespace clang { namespace dataflow { @@ -1527,6 +1536,40 @@ TEST(TransferTest, BaseClassInitializer) { llvm::Succeeded()); } +TEST(TransferTest, BaseClassInitializerFromSiblingDerivedInstance) { + using ast_matchers::cxxConstructorDecl; + using ast_matchers::hasName; + using ast_matchers::ofClass; + + std::string Code = R"( + struct Base { + bool BaseField; + char UnmodeledField; + }; + + struct DerivedOne : public Base { + int DerivedOneField; + }; + + struct DerivedTwo : public Base { + int DerivedTwoField; + + DerivedTwo(const DerivedOne& d1) + : Base(d1), DerivedTwoField(d1.DerivedOneField) { + (void)BaseField; + } + }; + )"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + // Regression test only; we used to crash when transferring the base + // class initializer from the DerivedToBase-cast `d1`. + }, + LangStandard::lang_cxx17, /*ApplyBuiltinTransfer=*/true, "DerivedTwo"); +} + TEST(TransferTest, FieldsDontHaveValuesInConstructor) { // In a constructor, unlike in regular member functions, we don't want fields // to be pre-initialized with values, because doing so is the job of the @@ -3541,7 +3584,7 @@ TEST(TransferTest, ResultObjectLocationDontVisitUnevaluatedContexts) { testFunction(Code, "noexceptTarget"); } -TEST(TransferTest, StaticCast) { +TEST(TransferTest, StaticCastNoOp) { std::string Code = R"( void target(int Foo) { int Bar = static_cast<int>(Foo); @@ -3561,6 +3604,13 @@ TEST(TransferTest, StaticCast) { const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); ASSERT_THAT(BarDecl, NotNull()); + const auto *Cast = ast_matchers::selectFirst<CXXStaticCastExpr>( + "cast", + ast_matchers::match(ast_matchers::cxxStaticCastExpr().bind("cast"), + ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_NoOp); + const auto *FooVal = Env.getValue(*FooDecl); const auto *BarVal = Env.getValue(*BarDecl); EXPECT_TRUE(isa<IntegerValue>(FooVal)); @@ -3569,6 +3619,268 @@ TEST(TransferTest, StaticCast) { }); } +TEST(TransferTest, StaticCastBaseToDerived) { + std::string Code = R"cc( + struct Base { + char C; + }; + struct Intermediate : public Base { + bool B; + }; + struct Derived : public Intermediate { + int I; + }; + Base& getBaseRef(); + void target(Base* BPtr) { + Derived* DPtr = static_cast<Derived*>(BPtr); + DPtr->C; + DPtr->B; + DPtr->I; + Derived& DRef = static_cast<Derived&>(*BPtr); + DRef.C; + DRef.B; + DRef.I; + Derived& DRefFromFunc = static_cast<Derived&>(getBaseRef()); + DRefFromFunc.C; + DRefFromFunc.B; + DRefFromFunc.I; + // [[p]] + } + )cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + const ValueDecl *BPtrDecl = findValueDecl(ASTCtx, "BPtr"); + ASSERT_THAT(BPtrDecl, NotNull()); + + const ValueDecl *DPtrDecl = findValueDecl(ASTCtx, "DPtr"); + ASSERT_THAT(DPtrDecl, NotNull()); + + const ValueDecl *DRefDecl = findValueDecl(ASTCtx, "DRef"); + ASSERT_THAT(DRefDecl, NotNull()); + + const ValueDecl *DRefFromFuncDecl = + findValueDecl(ASTCtx, "DRefFromFunc"); + ASSERT_THAT(DRefFromFuncDecl, NotNull()); + + const auto *Cast = ast_matchers::selectFirst<CXXStaticCastExpr>( + "cast", + ast_matchers::match(ast_matchers::cxxStaticCastExpr().bind("cast"), + ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_BaseToDerived); + + EXPECT_EQ(Env.getValue(*BPtrDecl), Env.getValue(*DPtrDecl)); + EXPECT_EQ(&Env.get<PointerValue>(*BPtrDecl)->getPointeeLoc(), + Env.getStorageLocation(*DRefDecl)); + // For DRefFromFunc, not crashing when analyzing the field accesses is + // enough. + }); +} + +TEST(TransferTest, ExplicitDerivedToBaseCast) { + std::string Code = R"cc( + struct Base {}; + struct Derived : public Base {}; + void target(Derived D) { + (Base*)&D; + // [[p]] + } +)cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>( + "cast", ast_matchers::match( + ast_matchers::implicitCastExpr().bind("cast"), ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_DerivedToBase); + + auto *AddressOf = ast_matchers::selectFirst<UnaryOperator>( + "addressof", + ast_matchers::match(ast_matchers::unaryOperator().bind("addressof"), + ASTCtx)); + ASSERT_THAT(AddressOf, NotNull()); + ASSERT_EQ(AddressOf->getOpcode(), UO_AddrOf); + + EXPECT_EQ(Env.getValue(*Cast), Env.getValue(*AddressOf)); + }); +} + +TEST(TransferTest, ConstructorConversion) { + std::string Code = R"cc( + struct Base {}; + struct Derived : public Base {}; + void target(Derived D) { + Base B = (Base)D; + // [[p]] + } +)cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Cast = ast_matchers::selectFirst<CStyleCastExpr>( + "cast", ast_matchers::match( + ast_matchers::cStyleCastExpr().bind("cast"), ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_ConstructorConversion); + + auto &DLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "D"); + auto &BLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "B"); + EXPECT_NE(&BLoc, &DLoc); + }); +} + +TEST(TransferTest, UserDefinedConversion) { + std::string Code = R"cc( + struct To {}; + struct From { + operator To(); + }; + void target(From F) { + To T = (To)F; + // [[p]] + } +)cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>( + "cast", ast_matchers::match( + ast_matchers::implicitCastExpr().bind("cast"), ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_UserDefinedConversion); + + auto &FLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "F"); + auto &TLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "T"); + EXPECT_NE(&TLoc, &FLoc); + }); +} + +TEST(TransferTest, ImplicitUncheckedDerivedToBaseCast) { + std::string Code = R"cc( + struct Base { + void method(); + }; + struct Derived : public Base {}; + void target(Derived D) { + D.method(); + // [[p]] + } +)cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>( + "cast", ast_matchers::match( + ast_matchers::implicitCastExpr().bind("cast"), ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_UncheckedDerivedToBase); + + auto &DLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "D"); + EXPECT_EQ(Env.getStorageLocation(*Cast), &DLoc); + }); +} + +TEST(TransferTest, ImplicitDerivedToBaseCast) { + std::string Code = R"cc( + struct Base {}; + struct Derived : public Base {}; + void target() { + Base* B = new Derived(); + // [[p]] + } +)cc"; + runDataflow( + Code, + [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>( + "cast", ast_matchers::match( + ast_matchers::implicitCastExpr().bind("cast"), ASTCtx)); + ASSERT_THAT(Cast, NotNull()); + ASSERT_EQ(Cast->getCastKind(), CK_DerivedToBase); + + auto *New = ast_matchers::selectFirst<CXXNewExpr>( + "new", ast_matchers::match(ast_matchers::cxxNewExpr().bind("new"), + ASTCtx)); + ASSERT_THAT(New, NotNull()); + + EXPECT_EQ(Env.getValue(*Cast), Env.getValue(*New)); + }); +} + +TEST(TransferTest, ReinterpretCast) { + std::string Code = R"cc( + struct S { + int I; + }; + + void target(unsigned char* Bytes) { + S& SRef = reinterpret_cast<S&>(Bytes); + SRef.I; + S* SPtr = reinterpret_cast<S*>(Bytes); + SPtr->I; + // [[p]] + } + )cc"; + runDataflow(Code, [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> + &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + const ValueDecl *I = findValueDecl(ASTCtx, "I"); + ASSERT_THAT(I, NotNull()); + + // No particular knowledge of I's value is modeled, but for both casts, + // the fields of S are modeled. + + { + auto &Loc = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "SRef"); + std::vector<const ValueDecl *> Children; + for (const auto &Entry : Loc.children()) { + Children.push_back(Entry.getFirst()); + } + + EXPECT_THAT(Children, UnorderedElementsAre(I)); + } + + { + auto &Loc = cast<RecordStorageLocation>( + getValueForDecl<PointerValue>(ASTCtx, Env, "SPtr").getPointeeLoc()); + std::vector<const ValueDecl *> Children; + for (const auto &Entry : Loc.children()) { + Children.push_back(Entry.getFirst()); + } + + EXPECT_THAT(Children, UnorderedElementsAre(I)); + } + }); +} + TEST(TransferTest, IntegralCast) { std::string Code = R"( void target(int Foo) { diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 44c0978..9b9f5dd 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -501,8 +501,6 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootConsantsTest) { TEST_F(ParseHLSLRootSignatureTest, ValidParseRootFlagsTest) { using llvm::dxbc::RootFlags; const llvm::StringLiteral Source = R"cc( - RootFlags(), - RootFlags(0), RootFlags( deny_domain_shader_root_access | deny_pixel_shader_root_access | @@ -533,18 +531,10 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootFlagsTest) { ASSERT_FALSE(Parser.parse()); auto Elements = Parser.getElements(); - ASSERT_EQ(Elements.size(), 3u); + ASSERT_EQ(Elements.size(), 1u); RootElement Elem = Elements[0].getElement(); ASSERT_TRUE(std::holds_alternative<RootFlags>(Elem)); - ASSERT_EQ(std::get<RootFlags>(Elem), RootFlags::None); - - Elem = Elements[1].getElement(); - ASSERT_TRUE(std::holds_alternative<RootFlags>(Elem)); - ASSERT_EQ(std::get<RootFlags>(Elem), RootFlags::None); - - Elem = Elements[2].getElement(); - ASSERT_TRUE(std::holds_alternative<RootFlags>(Elem)); auto ValidRootFlags = RootFlags::AllowInputAssemblerInputLayout | RootFlags::DenyVertexShaderRootAccess | RootFlags::DenyHullShaderRootAccess | @@ -562,6 +552,64 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootFlagsTest) { ASSERT_TRUE(Consumer->isSatisfied()); } +TEST_F(ParseHLSLRootSignatureTest, ValidParseEmptyRootFlagsTest) { + using llvm::dxbc::RootFlags; + const llvm::StringLiteral Source = R"cc( + RootFlags(), + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP); + + // Test no diagnostics produced + Consumer->setNoDiag(); + + ASSERT_FALSE(Parser.parse()); + + auto Elements = Parser.getElements(); + ASSERT_EQ(Elements.size(), 1u); + + RootElement Elem = Elements[0].getElement(); + ASSERT_TRUE(std::holds_alternative<RootFlags>(Elem)); + ASSERT_EQ(std::get<RootFlags>(Elem), RootFlags::None); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, ValidParseZeroRootFlagsTest) { + using llvm::dxbc::RootFlags; + const llvm::StringLiteral Source = R"cc( + RootFlags(0), + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP); + + // Test no diagnostics produced + Consumer->setNoDiag(); + + ASSERT_FALSE(Parser.parse()); + + auto Elements = Parser.getElements(); + ASSERT_EQ(Elements.size(), 1u); + + RootElement Elem = Elements[0].getElement(); + ASSERT_TRUE(std::holds_alternative<RootFlags>(Elem)); + ASSERT_EQ(std::get<RootFlags>(Elem), RootFlags::None); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + TEST_F(ParseHLSLRootSignatureTest, ValidParseRootDescriptorsTest) { using llvm::dxbc::RootDescriptorFlags; const llvm::StringLiteral Source = R"cc( @@ -1658,4 +1706,27 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorRangeFlagsValueTest) { ASSERT_TRUE(Consumer->isSatisfied()); } +TEST_F(ParseHLSLRootSignatureTest, InvalidMultipleRootFlagsTest) { + // This test will check that an error is produced when there are multiple + // root flags provided + const llvm::StringLiteral Source = R"cc( + RootFlags(DENY_VERTEX_SHADER_ROOT_ACCESS), + RootFlags(DENY_PIXEL_SHADER_ROOT_ACCESS) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_rootsig_repeat_param); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + } // anonymous namespace diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index d63e79a..a4e4de3 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -2725,12 +2725,15 @@ static void emitAttributes(const RecordKeeper &Records, raw_ostream &OS, assert(!Supers.empty() && "Forgot to specify a superclass for the attr"); std::string SuperName; bool Inheritable = false; + bool HLSLSemantic = false; for (const Record *R : reverse(Supers)) { if (R->getName() != "TargetSpecificAttr" && R->getName() != "DeclOrTypeAttr" && SuperName.empty()) SuperName = R->getName().str(); if (R->getName() == "InheritableAttr") Inheritable = true; + if (R->getName() == "HLSLSemanticAttr") + HLSLSemantic = true; } if (Header) @@ -3054,6 +3057,8 @@ static void emitAttributes(const RecordKeeper &Records, raw_ostream &OS, << (R.getValueAsBit("InheritEvenIfAlreadyPresent") ? "true" : "false"); } + if (HLSLSemantic) + OS << ", " << (R.getValueAsBit("SemanticIndexable") ? "true" : "false"); OS << ")\n"; for (auto const &ai : Args) { @@ -3270,7 +3275,8 @@ static const AttrClassDescriptor AttrClassDescriptors[] = { {"INHERITABLE_PARAM_ATTR", "InheritableParamAttr"}, {"INHERITABLE_PARAM_OR_STMT_ATTR", "InheritableParamOrStmtAttr"}, {"PARAMETER_ABI_ATTR", "ParameterABIAttr"}, - {"HLSL_ANNOTATION_ATTR", "HLSLAnnotationAttr"}}; + {"HLSL_ANNOTATION_ATTR", "HLSLAnnotationAttr"}, + {"HLSL_SEMANTIC_ATTR", "HLSLSemanticAttr"}}; static void emitDefaultDefine(raw_ostream &OS, StringRef name, const char *superName) { @@ -5209,6 +5215,14 @@ public: Other.Spellings[Kind].end()); } } + + bool hasSpelling() const { + for (size_t Kind = 0; Kind < NumSpellingKinds; ++Kind) { + if (Spellings[Kind].size() > 0) + return true; + } + return false; + } }; class DocumentationData { @@ -5246,6 +5260,16 @@ GetAttributeHeadingAndSpellings(const Record &Documentation, // documentation. This may not be a limiting factor since the spellings // should generally be consistently applied across the category. + if (Cat == "HLSL Semantics") { + if (!Attribute.getName().starts_with("HLSL")) + PrintFatalError(Attribute.getLoc(), + "HLSL semantic attribute name must start with HLSL"); + + assert(Attribute.getName().size() > 4); + std::string Name = Attribute.getName().substr(4).str(); + return std::make_pair(std::move(Name), SpellingList()); + } + std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(Attribute); if (Spellings.empty()) PrintFatalError(Attribute.getLoc(), @@ -5296,37 +5320,39 @@ static void WriteDocumentation(const RecordKeeper &Records, OS << ".. _" << Label << ":\n\n"; OS << Doc.Heading << "\n" << std::string(Doc.Heading.length(), '-') << "\n"; - // List what spelling syntaxes the attribute supports. - // Note: "#pragma clang attribute" is handled outside the spelling kinds loop - // so it must be last. - OS << ".. csv-table:: Supported Syntaxes\n"; - OS << " :header: \"GNU\", \"C++11\", \"C23\", \"``__declspec``\","; - OS << " \"Keyword\", \"``#pragma``\", \"HLSL Annotation\", \"``#pragma " - "clang "; - OS << "attribute``\"\n\n \""; - for (size_t Kind = 0; Kind != NumSpellingKinds; ++Kind) { - SpellingKind K = (SpellingKind)Kind; - // TODO: List Microsoft (IDL-style attribute) spellings once we fully - // support them. - if (K == SpellingKind::Microsoft) - continue; + if (Doc.SupportedSpellings.hasSpelling()) { + // List what spelling syntaxes the attribute supports. + // Note: "#pragma clang attribute" is handled outside the spelling kinds + // loop so it must be last. + OS << ".. csv-table:: Supported Syntaxes\n"; + OS << " :header: \"GNU\", \"C++11\", \"C23\", \"``__declspec``\","; + OS << " \"Keyword\", \"``#pragma``\", \"HLSL Annotation\", \"``#pragma " + "clang "; + OS << "attribute``\"\n\n \""; + for (size_t Kind = 0; Kind != NumSpellingKinds; ++Kind) { + SpellingKind K = (SpellingKind)Kind; + // TODO: List Microsoft (IDL-style attribute) spellings once we fully + // support them. + if (K == SpellingKind::Microsoft) + continue; - bool PrintedAny = false; - for (StringRef Spelling : Doc.SupportedSpellings[K]) { - if (PrintedAny) - OS << " |br| "; - OS << "``" << Spelling << "``"; - PrintedAny = true; + bool PrintedAny = false; + for (StringRef Spelling : Doc.SupportedSpellings[K]) { + if (PrintedAny) + OS << " |br| "; + OS << "``" << Spelling << "``"; + PrintedAny = true; + } + + OS << "\",\""; } - OS << "\",\""; + if (getPragmaAttributeSupport(Records).isAttributedSupported( + *Doc.Attribute)) + OS << "Yes"; + OS << "\"\n\n"; } - if (getPragmaAttributeSupport(Records).isAttributedSupported( - *Doc.Attribute)) - OS << "Yes"; - OS << "\"\n\n"; - // If the attribute is deprecated, print a message about it, and possibly // provide a replacement attribute. if (!Doc.Documentation->isValueUnset("Deprecated")) { |