aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/docs/LanguageExtensions.rst160
-rw-r--r--clang/docs/ReleaseNotes.rst7
-rw-r--r--clang/include/clang/Basic/Attr.td15
-rw-r--r--clang/include/clang/Basic/AttrDocs.td15
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td4
-rw-r--r--clang/include/clang/Basic/Features.def2
-rw-r--r--clang/include/clang/Basic/LangOptions.h66
-rw-r--r--clang/include/clang/Basic/TargetInfo.h10
-rw-r--r--clang/include/clang/Basic/TargetOptions.h3
-rw-r--r--clang/include/clang/Driver/Options.td30
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp5
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp5
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h42
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp3
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h8
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp20
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp7
-rw-r--r--clang/lib/Sema/SemaStmtAttr.cpp34
-rw-r--r--clang/test/AST/ast-dump-atomic-options.hip136
-rw-r--r--clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c247
-rw-r--r--clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu195
-rw-r--r--clang/test/CodeGenCUDA/atomic-ops.cu200
-rw-r--r--clang/test/CodeGenCUDA/atomic-options.hip465
-rw-r--r--clang/test/CodeGenOpenCL/atomic-ops.cl20
-rw-r--r--clang/test/Driver/atomic-options.hip6
-rw-r--r--clang/test/Driver/hip-options.hip4
-rw-r--r--clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp10
-rw-r--r--clang/test/Parser/Inputs/cuda.h54
-rw-r--r--clang/test/Parser/atomic-options.hip75
29 files changed, 1438 insertions, 410 deletions
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 86295a3..578ee02 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -5442,6 +5442,166 @@ third argument, can only occur at file scope.
a = b[i] * c[i] + e;
}
+Extensions for controlling atomic code generation
+=================================================
+
+The ``[[clang::atomic]]`` statement attribute enables users to control how
+atomic operations are lowered in LLVM IR by conveying additional metadata to
+the backend. The primary goal is to allow users to specify certain options,
+like whether the affected atomic operations might be used with specific types of memory or
+whether to ignore denormal mode correctness in floating-point operations,
+without affecting the correctness of code that does not rely on these properties.
+
+In LLVM, lowering of atomic operations (e.g., ``atomicrmw``) can differ based
+on the target's capabilities. Some backends support native atomic instructions
+only for certain operation types or alignments, or only in specific memory
+regions. Likewise, floating-point atomic instructions may or may not respect
+IEEE denormal requirements. When the user is unconcerned about denormal-mode
+compliance (for performance reasons) or knows that certain atomic operations
+will not be performed on a particular type of memory, extra hints are needed to
+tell the backend how to proceed.
+
+A classic example is an architecture where floating-point atomic add does not
+fully conform to IEEE denormal-mode handling. If the user does not mind ignoring
+that aspect, they would prefer to emit a faster hardware atomic instruction,
+rather than a fallback or CAS loop. Conversely, on certain GPUs (e.g., AMDGPU),
+memory accessed via PCIe may only support a subset of atomic operations. To ensure
+correct and efficient lowering, the compiler must know whether the user needs
+the atomic operations to work with that type of memory.
+
+The allowed atomic attribute values are now ``remote_memory``, ``fine_grained_memory``,
+and ``ignore_denormal_mode``, each optionally prefixed with ``no_``. The meanings
+are as follows:
+
+- ``remote_memory`` means atomic operations may be performed on remote
+ memory, i.e. memory accessed through off-chip interconnects (e.g., PCIe).
+ On ROCm platforms using HIP, remote memory refers to memory accessed via
+ PCIe and is subject to specific atomic operation support. See
+ `ROCm PCIe Atomics <https://rocm.docs.amd.com/en/latest/conceptual/
+ pcie-atomics.html>`_ for further details. Prefixing with ``no_remote_memory`` indicates that
+ atomic operations should not be performed on remote memory.
+- ``fine_grained_memory`` means atomic operations may be performed on fine-grained
+ memory, i.e. memory regions that support fine-grained coherence, where updates to
+ memory are visible to other parts of the system even while modifications are ongoing.
+ For example, in HIP, fine-grained coherence ensures that host and device share
+ up-to-date data without explicit synchronization (see
+ `HIP Definition <https://rocm.docs.amd.com/projects/HIP/en/docs-6.3.3/how-to/hip_runtime_api/memory_management/coherence_control.html#coherence-control>`_).
+ Similarly, OpenCL 2.0 provides fine-grained synchronization in shared virtual memory
+ allocations, allowing concurrent modifications by host and device (see
+ `OpenCL 2.0 Overview <https://www.intel.com/content/www/us/en/developer/articles/technical/opencl-20-shared-virtual-memory-overview.html>`_).
+ Prefixing with ``no_fine_grained_memory`` indicates that atomic operations should not
+ be performed on fine-grained memory.
+- ``ignore_denormal_mode`` means that atomic operations are allowed to ignore
+ correctness for denormal mode in floating-point operations, potentially improving
+ performance on architectures that handle denormals inefficiently. The negated form,
+ if specified as ``no_ignore_denormal_mode``, would enforce strict denormal mode
+ correctness.
+
+Any unspecified option is inherited from the global defaults, which can be set
+by a compiler flag or the target's built-in defaults.
+
+Within the same atomic attribute, duplicate and conflicting values are accepted,
+and the last of any conflicting values wins. Multiple atomic attributes are
+allowed for the same compound statement, and the last atomic attribute wins.
+
+Without any atomic metadata, LLVM IR defaults to conservative settings for
+correctness: atomic operations enforce denormal mode correctness and are assumed
+to potentially use remote and fine-grained memory (i.e., the equivalent of
+``remote_memory``, ``fine_grained_memory``, and ``no_ignore_denormal_mode``).
+
+The attribute may be applied only to a compound statement and looks like:
+
+.. code-block:: c++
+
+ [[clang::atomic(remote_memory, fine_grained_memory, ignore_denormal_mode)]]
+ {
+ // Atomic instructions in this block carry extra metadata reflecting
+ // these user-specified options.
+ }
+
+A new compiler option now globally sets the defaults for these atomic-lowering
+options. The command-line format has changed to:
+
+.. code-block:: console
+
+ $ clang -fatomic-remote-memory -fno-atomic-fine-grained-memory -fatomic-ignore-denormal-mode file.cpp
+
+Each option has a corresponding flag:
+``-fatomic-remote-memory`` / ``-fno-atomic-remote-memory``,
+``-fatomic-fine-grained-memory`` / ``-fno-atomic-fine-grained-memory``,
+and ``-fatomic-ignore-denormal-mode`` / ``-fno-atomic-ignore-denormal-mode``.
+
+Code using the ``[[clang::atomic]]`` attribute can then selectively override
+the command-line defaults on a per-block basis. For instance:
+
+.. code-block:: c++
+
+ // Suppose the global defaults assume:
+ // remote_memory, fine_grained_memory, and no_ignore_denormal_mode
+ // (for conservative correctness)
+
+ void example() {
+ // Locally override the settings: disable remote_memory and enable
+ // fine_grained_memory.
+ [[clang::atomic(no_remote_memory, fine_grained_memory)]]
+ {
+ // In this block:
+ // - Atomic operations are not performed on remote memory.
+ // - Atomic operations are performed on fine-grained memory.
+ // - The setting for denormal mode remains as the global default
+ // (typically no_ignore_denormal_mode, enforcing strict denormal mode correctness).
+ // ...
+ }
+ }
+
+Function bodies do not accept statement attributes, so this will not work:
+
+.. code-block:: c++
+
+ void func() [[clang::atomic(remote_memory)]] { // Wrong: applies to function type
+ }
+
+Use the attribute on a compound statement within the function:
+
+.. code-block:: c++
+
+ void func() {
+ [[clang::atomic(remote_memory)]]
+ {
+ // Atomic operations in this block carry the specified metadata.
+ }
+ }
+
+The ``[[clang::atomic]]`` attribute affects only the code generation of atomic
+instructions within the annotated compound statement. Clang attaches target-specific
+metadata to those atomic instructions in the emitted LLVM IR to guide backend lowering.
+This metadata is fixed at the Clang code generation phase and is not modified by later
+LLVM passes (such as function inlining).
+
+For example, consider:
+
+.. code-block:: cpp
+
+ inline void func() {
+ [[clang::atomic(remote_memory)]]
+ {
+ // Atomic instructions lowered with metadata.
+ }
+ }
+
+ void foo() {
+ [[clang::atomic(no_remote_memory)]]
+ {
+ func(); // Inlined by LLVM, but the metadata from 'func()' remains unchanged.
+ }
+ }
+
+Although current usage focuses on AMDGPU, the mechanism is general. Other
+backends can ignore or implement their own responses to these flags if desired.
+If a target does not understand or enforce these hints, the IR remains valid,
+and the resulting program is still correct (although potentially less optimized
+for that user's needs).
+
Specifying an attribute for multiple declarations (#pragma clang attribute)
===========================================================================
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 307edf7..431a4c7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -181,6 +181,13 @@ related warnings within the method body.
``format_matches`` accepts an example valid format string as its third
argument. For more information, see the Clang attributes documentation.
+- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
+ fine-grained control over atomic code generation on a per-statement basis.
+ Supported options include ``[no_]remote_memory``,
+ ``[no_]fine_grained_memory``, and ``[no_]ignore_denormal_mode``. These are
+ particularly relevant for AMDGPU targets, where they map to corresponding IR
+ metadata.
+
Improvements to Clang's diagnostics
-----------------------------------
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 8bbd096..7cf15f5 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5001,3 +5001,18 @@ def NoTrivialAutoVarInit: InheritableAttr {
let Documentation = [NoTrivialAutoVarInitDocs];
let SimpleHandler = 1;
}
+
+def Atomic : StmtAttr {
+ let Spellings = [Clang<"atomic">];
+ let Args = [VariadicEnumArgument<"AtomicOptions", "ConsumedOption",
+ /*is_string=*/false,
+ ["remote_memory", "no_remote_memory",
+ "fine_grained_memory", "no_fine_grained_memory",
+ "ignore_denormal_mode", "no_ignore_denormal_mode"],
+ ["remote_memory", "no_remote_memory",
+ "fine_grained_memory", "no_fine_grained_memory",
+ "ignore_denormal_mode", "no_ignore_denormal_mode"]>];
+ let Subjects = SubjectList<[CompoundStmt], ErrorDiag, "compound statements">;
+ let Documentation = [AtomicDocs];
+ let StrictEnumParameters = 1;
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 79c615b..8fc7edc 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -8205,6 +8205,21 @@ for details.
}];
}
+def AtomicDocs : Documentation {
+ let Category = DocCatStmt;
+ let Content = [{
+The ``atomic`` attribute can be applied to *compound statements* to override or
+further specify the default atomic code-generation behavior, especially on
+targets such as AMDGPU. You can annotate compound statements with options
+to modify how atomic instructions inside that statement are emitted at the IR
+level.
+
+For details, see the documentation for `@atomic
+<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-controlling-atomic-code-generation>`_
+
+ }];
+}
+
def ClangRandomizeLayoutDocs : Documentation {
let Category = DocCatDecl;
let Heading = "randomize_layout, no_randomize_layout";
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 73b00752..12e2e47 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3286,6 +3286,10 @@ def err_invalid_branch_protection_spec : Error<
"invalid or misplaced branch protection specification '%0'">;
def warn_unsupported_branch_protection_spec : Warning<
"unsupported branch protection specification '%0'">, InGroup<BranchProtection>;
+def err_attribute_invalid_atomic_argument : Error<
+ "invalid argument '%0' to atomic attribute; valid options are: "
+ "'remote_memory', 'fine_grained_memory', 'ignore_denormal_mode' (optionally "
+ "prefixed with 'no_')">;
def warn_unsupported_target_attribute
: Warning<"%select{unsupported|duplicate|unknown}0%select{| CPU|"
diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index e736b46..92b1705 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -313,6 +313,8 @@ EXTENSION(datasizeof, LangOpts.CPlusPlus)
FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables)
+FEATURE(clang_atomic_attributes, true)
+
// CUDA/HIP Features
FEATURE(cuda_noinline_keyword, LangOpts.CUDA)
EXTENSION(cuda_implicit_host_device_templates, LangOpts.CUDA && LangOpts.OffloadImplicitHostDeviceTemplates)
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 34d75af..e925e0f 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -630,6 +630,12 @@ public:
// WebAssembly target.
bool NoWasmOpt = false;
+ /// Atomic code-generation options.
+ /// These flags are set directly from the command-line options.
+ bool AtomicRemoteMemory = false;
+ bool AtomicFineGrainedMemory = false;
+ bool AtomicIgnoreDenormalMode = false;
+
LangOptions();
/// Set language defaults for the given input language and
@@ -1109,6 +1115,66 @@ inline void FPOptions::applyChanges(FPOptionsOverride FPO) {
*this = FPO.applyOverrides(*this);
}
+// The three atomic code-generation options.
+// The canonical (positive) names are:
+// "remote_memory", "fine_grained_memory", and "ignore_denormal_mode".
+// In attribute or command-line parsing, a token prefixed with "no_" inverts its
+// value.
+enum class AtomicOptionKind {
+ RemoteMemory, // enable remote memory.
+ FineGrainedMemory, // enable fine-grained memory.
+ IgnoreDenormalMode, // ignore floating-point denormals.
+ LANGOPT_ATOMIC_OPTION_LAST = IgnoreDenormalMode,
+};
+
+struct AtomicOptions {
+ // Bitfields for each option.
+ unsigned remote_memory : 1;
+ unsigned fine_grained_memory : 1;
+ unsigned ignore_denormal_mode : 1;
+
+ AtomicOptions()
+ : remote_memory(0), fine_grained_memory(0), ignore_denormal_mode(0) {}
+
+ AtomicOptions(const LangOptions &LO)
+ : remote_memory(LO.AtomicRemoteMemory),
+ fine_grained_memory(LO.AtomicFineGrainedMemory),
+ ignore_denormal_mode(LO.AtomicIgnoreDenormalMode) {}
+
+ bool getOption(AtomicOptionKind Kind) const {
+ switch (Kind) {
+ case AtomicOptionKind::RemoteMemory:
+ return remote_memory;
+ case AtomicOptionKind::FineGrainedMemory:
+ return fine_grained_memory;
+ case AtomicOptionKind::IgnoreDenormalMode:
+ return ignore_denormal_mode;
+ }
+ llvm_unreachable("Invalid AtomicOptionKind");
+ }
+
+ void setOption(AtomicOptionKind Kind, bool Value) {
+ switch (Kind) {
+ case AtomicOptionKind::RemoteMemory:
+ remote_memory = Value;
+ return;
+ case AtomicOptionKind::FineGrainedMemory:
+ fine_grained_memory = Value;
+ return;
+ case AtomicOptionKind::IgnoreDenormalMode:
+ ignore_denormal_mode = Value;
+ return;
+ }
+ llvm_unreachable("Invalid AtomicOptionKind");
+ }
+
+ LLVM_DUMP_METHOD void dump() const {
+ llvm::errs() << "\n remote_memory: " << remote_memory
+ << "\n fine_grained_memory: " << fine_grained_memory
+ << "\n ignore_denormal_mode: " << ignore_denormal_mode << "\n";
+ }
+};
+
/// Describes the kind of translation unit being processed.
enum TranslationUnitKind {
/// The translation unit is a complete translation unit.
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index db23afa..291cf26 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -301,6 +301,9 @@ protected:
// in function attributes in IR.
llvm::StringSet<> ReadOnlyFeatures;
+ // Default atomic options
+ AtomicOptions AtomicOpts;
+
public:
/// Construct a target for the given options.
///
@@ -1060,10 +1063,6 @@ public:
/// available on this target.
bool hasRISCVVTypes() const { return HasRISCVVTypes; }
- /// Returns whether or not the AMDGPU unsafe floating point atomics are
- /// allowed.
- bool allowAMDGPUUnsafeFPAtomics() const { return AllowAMDGPUUnsafeFPAtomics; }
-
/// For ARM targets returns a mask defining which coprocessors are configured
/// as Custom Datapath.
uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; }
@@ -1699,6 +1698,9 @@ public:
return CC_C;
}
+ /// Get the default atomic options.
+ AtomicOptions getAtomicOpts() const { return AtomicOpts; }
+
enum CallingConvCheckResult {
CCCR_OK,
CCCR_Warning,
diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h
index 2049f03..51a2cfe 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -75,9 +75,6 @@ public:
/// address space.
bool NVPTXUseShortPointers = false;
- /// \brief If enabled, allow AMDGPU unsafe floating point atomics.
- bool AllowAMDGPUUnsafeFPAtomics = false;
-
/// \brief Code object version for AMDGPU.
llvm::CodeObjectVersionKind CodeObjectVersion =
llvm::CodeObjectVersionKind::COV_None;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e521cbf..883d6a9 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2278,6 +2278,24 @@ def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group<f_Group>,
Visibility<[ClangOption, CC1Option]>,
MarshallingInfoString<CodeGenOpts<"SymbolPartition">>;
+defm atomic_remote_memory : BoolFOption<"atomic-remote-memory",
+ LangOpts<"AtomicRemoteMemory">, DefaultFalse,
+ PosFlag<SetTrue, [], [ClangOption, CC1Option], "May have">,
+ NegFlag<SetFalse, [], [ClangOption], "Assume no">,
+ BothFlags<[], [ClangOption], " atomic operations on remote memory">>;
+
+defm atomic_fine_grained_memory : BoolFOption<"atomic-fine-grained-memory",
+ LangOpts<"AtomicFineGrainedMemory">, DefaultFalse,
+ PosFlag<SetTrue, [], [ClangOption, CC1Option], "May have">,
+ NegFlag<SetFalse, [], [ClangOption], "Assume no">,
+ BothFlags<[], [ClangOption], " atomic operations on fine-grained memory">>;
+
+defm atomic_ignore_denormal_mode : BoolFOption<"atomic-ignore-denormal-mode",
+ LangOpts<"AtomicIgnoreDenormalMode">, DefaultFalse,
+ PosFlag<SetTrue, [], [ClangOption, CC1Option], "Allow">,
+ NegFlag<SetFalse, [], [ClangOption], "Disallow">,
+ BothFlags<[], [ClangOption], " atomic operations to ignore denormal mode">>;
+
defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">;
def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">,
Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
@@ -5154,14 +5172,10 @@ defm amdgpu_precise_memory_op
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
" precise memory mode (AMDGPU only)">;
-defm unsafe_fp_atomics : BoolMOption<"unsafe-fp-atomics",
- TargetOpts<"AllowAMDGPUUnsafeFPAtomics">, DefaultFalse,
- PosFlag<SetTrue, [], [ClangOption, CC1Option],
- "Enable generation of unsafe floating point "
- "atomic instructions. May generate more efficient code, but may not "
- "respect rounding and denormal modes, and may give incorrect results "
- "for certain memory destinations. (AMDGPU only)">,
- NegFlag<SetFalse>>;
+def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">,
+ Visibility<[ClangOption, CC1Option]>, Alias<fatomic_ignore_denormal_mode>;
+def mno_unsafe_fp_atomics : Flag<["-"], "mno-unsafe-fp-atomics">,
+ Visibility<[ClangOption]>, Alias<fno_atomic_ignore_denormal_mode>;
def faltivec : Flag<["-"], "faltivec">, Group<f_Group>;
def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>;
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 228f967..a42b458 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -248,7 +248,6 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
HasLegalHalfType = true;
HasFloat16 = true;
WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
- AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
// Set pointer width and alignment for the generic address space.
PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
@@ -273,6 +272,8 @@ void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
// to OpenCL can be removed from the following line.
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
!isAMDGCN(getTriple()));
+
+ AtomicOpts = AtomicOptions(Opts);
}
llvm::SmallVector<Builtin::InfosShard>
@@ -330,7 +331,7 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
}
}
- if (AllowAMDGPUUnsafeFPAtomics)
+ if (Opts.AtomicIgnoreDenormalMode)
Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 7368df1..e56ba6c 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -786,6 +786,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
HLSLControlFlowHintAttr::Spelling flattenOrBranch =
HLSLControlFlowHintAttr::SpellingNotCalculated;
const CallExpr *musttail = nullptr;
+ const AtomicAttr *AA = nullptr;
for (const auto *A : S.getAttrs()) {
switch (A->getKind()) {
@@ -816,6 +817,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
Builder.CreateAssumption(AssumptionVal);
}
} break;
+ case attr::Atomic:
+ AA = cast<AtomicAttr>(A);
+ break;
case attr::HLSLControlFlowHint: {
flattenOrBranch = cast<HLSLControlFlowHintAttr>(A)->getSemanticSpelling();
} break;
@@ -827,6 +831,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent);
SaveAndRestore save_musttail(MustTailCall, musttail);
SaveAndRestore save_flattenOrBranch(HLSLControlFlowAttr, flattenOrBranch);
+ CGAtomicOptionsRAII AORAII(CGM, AA);
EmitStmt(S.getSubStmt(), S.getAttrs());
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8c5362b..7c0d6c3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -834,6 +834,48 @@ public:
};
FPOptions CurFPFeatures;
+ class CGAtomicOptionsRAII {
+ public:
+ CGAtomicOptionsRAII(CodeGenModule &CGM_, AtomicOptions AO)
+ : CGM(CGM_), SavedAtomicOpts(CGM.getAtomicOpts()) {
+ CGM.setAtomicOpts(AO);
+ }
+ CGAtomicOptionsRAII(CodeGenModule &CGM_, const AtomicAttr *AA)
+ : CGM(CGM_), SavedAtomicOpts(CGM.getAtomicOpts()) {
+ if (!AA)
+ return;
+ AtomicOptions AO = SavedAtomicOpts;
+ for (auto Option : AA->atomicOptions()) {
+ switch (Option) {
+ case AtomicAttr::remote_memory:
+ AO.remote_memory = true;
+ break;
+ case AtomicAttr::no_remote_memory:
+ AO.remote_memory = false;
+ break;
+ case AtomicAttr::fine_grained_memory:
+ AO.fine_grained_memory = true;
+ break;
+ case AtomicAttr::no_fine_grained_memory:
+ AO.fine_grained_memory = false;
+ break;
+ case AtomicAttr::ignore_denormal_mode:
+ AO.ignore_denormal_mode = true;
+ break;
+ case AtomicAttr::no_ignore_denormal_mode:
+ AO.ignore_denormal_mode = false;
+ break;
+ }
+ }
+ CGM.setAtomicOpts(AO);
+ }
+ ~CGAtomicOptionsRAII() { CGM.setAtomicOpts(SavedAtomicOpts); }
+
+ private:
+ CodeGenModule &CGM;
+ AtomicOptions SavedAtomicOpts;
+ };
+
public:
/// ObjCEHValueStack - Stack of Objective-C exception values, used for
/// rethrows.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 1b7d0ac..3caa79b 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -342,7 +342,8 @@ CodeGenModule::CodeGenModule(ASTContext &C,
PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
VMContext(M.getContext()), VTables(*this), StackHandler(diags),
- SanitizerMD(new SanitizerMetadata(*this)) {
+ SanitizerMD(new SanitizerMetadata(*this)),
+ AtomicOpts(Target.getAtomicOpts()) {
// Initialize the type cache.
Types.reset(new CodeGenTypes(*this));
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index c6f6fd5..4a269f6 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -676,6 +676,8 @@ private:
std::optional<PointerAuthQualifier>
computeVTPointerAuthentication(const CXXRecordDecl *ThisClass);
+ AtomicOptions AtomicOpts;
+
public:
CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
const HeaderSearchOptions &headersearchopts,
@@ -691,6 +693,12 @@ public:
/// Finalize LLVM code generation.
void Release();
+ /// Get the current Atomic options.
+ AtomicOptions getAtomicOpts() { return AtomicOpts; }
+
+ /// Set the current Atomic options.
+ void setAtomicOpts(AtomicOptions AO) { AtomicOpts = AO; }
+
/// Return true if we should emit location information for expressions.
bool getExpressionLocationsEnabled() const;
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index dc45def..9d29f31 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -585,19 +585,19 @@ void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
}
- if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
+ if (!RMW)
return;
- // TODO: Introduce new, more controlled options that also work for integers,
- // and deprecate allowAMDGPUUnsafeFPAtomics.
- llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation();
- if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
- llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
+ AtomicOptions AO = CGF.CGM.getAtomicOpts();
+ llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
+ if (!AO.getOption(clang::AtomicOptionKind::FineGrainedMemory))
RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty);
-
- if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy())
- RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty);
- }
+ if (!AO.getOption(clang::AtomicOptionKind::RemoteMemory))
+ RMW->setMetadata("amdgpu.no.remote.memory", Empty);
+ if (AO.getOption(clang::AtomicOptionKind::IgnoreDenormalMode) &&
+ RMW->getOperation() == llvm::AtomicRMWInst::FAdd &&
+ RMW->getType()->isFloatTy())
+ RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty);
}
bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index aa83e3e..86db3f7 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5998,6 +5998,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
+ Args.addOptInFlag(CmdArgs, options::OPT_fatomic_remote_memory,
+ options::OPT_fno_atomic_remote_memory);
+ Args.addOptInFlag(CmdArgs, options::OPT_fatomic_fine_grained_memory,
+ options::OPT_fno_atomic_fine_grained_memory);
+ Args.addOptInFlag(CmdArgs, options::OPT_fatomic_ignore_denormal_mode,
+ options::OPT_fno_atomic_ignore_denormal_mode);
+
if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
const llvm::Triple::ArchType Arch = TC.getArch();
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 422d8ab..2f719c6 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -625,6 +625,38 @@ static Attr *handleHLSLControlFlowHint(Sema &S, Stmt *St, const ParsedAttr &A,
return ::new (S.Context) HLSLControlFlowHintAttr(S.Context, A);
}
+static Attr *handleAtomicAttr(Sema &S, Stmt *St, const ParsedAttr &AL,
+ SourceRange Range) {
+ if (!AL.checkAtLeastNumArgs(S, 1))
+ return nullptr;
+
+ SmallVector<AtomicAttr::ConsumedOption, 6> Options;
+ for (unsigned ArgIndex = 0; ArgIndex < AL.getNumArgs(); ++ArgIndex) {
+ AtomicAttr::ConsumedOption Option;
+ StringRef OptionString;
+ SourceLocation Loc;
+
+ if (!AL.isArgIdent(ArgIndex)) {
+ S.Diag(AL.getArgAsExpr(ArgIndex)->getBeginLoc(),
+ diag::err_attribute_argument_type)
+ << AL << AANT_ArgumentIdentifier;
+ return nullptr;
+ }
+
+ IdentifierLoc *Ident = AL.getArgAsIdent(ArgIndex);
+ OptionString = Ident->Ident->getName();
+ Loc = Ident->Loc;
+ if (!AtomicAttr::ConvertStrToConsumedOption(OptionString, Option)) {
+ S.Diag(Loc, diag::err_attribute_invalid_atomic_argument) << OptionString;
+ return nullptr;
+ }
+ Options.push_back(Option);
+ }
+
+ return ::new (S.Context)
+ AtomicAttr(S.Context, AL, Options.data(), Options.size());
+}
+
static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange Range) {
if (A.isInvalid() || A.getKind() == ParsedAttr::IgnoredAttribute)
@@ -685,6 +717,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
return handleNoConvergentAttr(S, St, A, Range);
case ParsedAttr::AT_Annotate:
return S.CreateAnnotationAttr(A);
+ case ParsedAttr::AT_Atomic:
+ return handleAtomicAttr(S, St, A, Range);
default:
if (Attr *AT = nullptr; A.getInfo().handleStmtAttribute(S, St, A, AT) !=
ParsedAttrInfo::NotHandled) {
diff --git a/clang/test/AST/ast-dump-atomic-options.hip b/clang/test/AST/ast-dump-atomic-options.hip
new file mode 100644
index 0000000..f34f592
--- /dev/null
+++ b/clang/test/AST/ast-dump-atomic-options.hip
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -ast-dump %s | FileCheck %s
+// RUN: %clang_cc1 -ast-dump -fcuda-is-device %s | FileCheck %s
+// RUN: %clang_cc1 -ast-dump -fcuda-is-device %s \
+// RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode \
+// RUN: | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_default
+// CHECK-NOT: AttributedStmt
+// CHECK-NOT: AtomicAttr
+// CHECK: CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_default(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_one
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_remote_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_one(float *a) {
+ [[clang::atomic(no_remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_two
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} remote_memory ignore_denormal_mode{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_two(float *a) {
+ [[clang::atomic(remote_memory, ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_three
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_remote_memory fine_grained_memory no_ignore_denormal_mode{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_three(float *a) {
+ [[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_duplicate
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_remote_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_duplicate(float *a) {
+ [[clang::atomic(no_remote_memory, no_remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_conflict
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} remote_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_conflict(float *a) {
+ [[clang::atomic(no_remote_memory, remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_multiple_attrs
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_remote_memory{{$}}
+// CHECK-NEXT: |-AtomicAttr {{.*}} remote_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_multiple_attrs(float *a) {
+ [[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} test_nested
+// CHECK: CompoundStmt
+// CHECK: |-AtomicExpr
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} remote_memory fine_grained_memory no_ignore_denormal_mode{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK: |-AtomicExpr
+// CHECK: |-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_remote_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+// CHECK: `-AttributedStmt
+// CHECK-NEXT: |-AtomicAttr {{.*}} no_fine_grained_memory{{$}}
+// CHECK-NEXT: `-CompoundStmt
+// CHECK-NEXT: `-AtomicExpr
+__device__ __host__ void test_nested(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ [[clang::atomic(remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+ [[clang::atomic(no_remote_memory)]] {
+ __scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP);
+ }
+ [[clang::atomic(no_fine_grained_memory)]] {
+ __scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT);
+ }
+ }
+}
+
+// CHECK-LABEL: FunctionTemplateDecl {{.*}} test_template
+// CHECK: |-FunctionDecl {{.*}} test_template 'void (T *)'
+// CHECK: | |-CompoundStmt
+// CHECK: | | `-AttributedStmt
+// CHECK: | | |-AtomicAttr {{.*}} no_remote_memory fine_grained_memory no_ignore_denormal_mode{{$}}
+// CHECK: | | `-CompoundStmt
+// CHECK: | | `-CallExpr {{.*}} '<dependent type>'
+// CHECK: `-FunctionDecl {{.*}} used test_template 'void (float *)' implicit_instantiation
+// CHECK: |-CompoundStmt
+// CHECK: | `-AttributedStmt
+// CHECK: | |-AtomicAttr {{.*}} no_remote_memory fine_grained_memory no_ignore_denormal_mode{{$}}
+// CHECK: | `-CompoundStmt
+// CHECK: | `-AtomicExpr {{.*}} 'float'
+template<typename T>
+__device__ __host__ void test_template(T *a) {
+ [[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_template_caller() {
+ float *p;
+ test_template(p);
+}
diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
index a8fb989..d744703 100644
--- a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
+++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
@@ -7,7 +7,7 @@
// SAFE-NEXT: [[ENTRY:.*:]]
// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]]
// SAFE-NEXT: ret float [[TMP0]]
//
// UNSAFE-LABEL: define dso_local float @test_float_post_inc(
@@ -15,7 +15,7 @@
// UNSAFE-NEXT: [[ENTRY:.*:]]
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
// UNSAFE-NEXT: ret float [[TMP0]]
//
float test_float_post_inc()
@@ -24,21 +24,13 @@ float test_float_post_inc()
return n++;
}
-// SAFE-LABEL: define dso_local float @test_float_post_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
-// SAFE-NEXT: ret float [[TMP0]]
-//
-// UNSAFE-LABEL: define dso_local float @test_float_post_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: ret float [[TMP0]]
+// CHECK-LABEL: define dso_local float @test_float_post_dc(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: ret float [[TMP0]]
//
float test_float_post_dc()
{
@@ -46,23 +38,14 @@ float test_float_post_dc()
return n--;
}
-// SAFE-LABEL: define dso_local float @test_float_pre_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
-// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
-// SAFE-NEXT: ret float [[TMP1]]
-//
-// UNSAFE-LABEL: define dso_local float @test_float_pre_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
-// UNSAFE-NEXT: ret float [[TMP1]]
+// CHECK-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// CHECK-NEXT: ret float [[TMP1]]
//
float test_float_pre_dc()
{
@@ -75,7 +58,7 @@ float test_float_pre_dc()
// SAFE-NEXT: [[ENTRY:.*:]]
// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
// SAFE-NEXT: ret float [[TMP1]]
//
@@ -84,7 +67,7 @@ float test_float_pre_dc()
// UNSAFE-NEXT: [[ENTRY:.*:]]
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
// UNSAFE-NEXT: ret float [[TMP1]]
//
@@ -94,21 +77,13 @@ float test_float_pre_inc()
return ++n;
}
-// SAFE-LABEL: define dso_local double @test_double_post_inc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8
-// SAFE-NEXT: ret double [[TMP0]]
-//
-// UNSAFE-LABEL: define dso_local double @test_double_post_inc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: ret double [[TMP0]]
+// CHECK-LABEL: define dso_local double @test_double_post_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: ret double [[TMP0]]
//
double test_double_post_inc()
{
@@ -116,21 +91,13 @@ double test_double_post_inc()
return n++;
}
-// SAFE-LABEL: define dso_local double @test_double_post_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8
-// SAFE-NEXT: ret double [[TMP0]]
-//
-// UNSAFE-LABEL: define dso_local double @test_double_post_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: ret double [[TMP0]]
+// CHECK-LABEL: define dso_local double @test_double_post_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: ret double [[TMP0]]
//
double test_double_post_dc()
{
@@ -138,23 +105,14 @@ double test_double_post_dc()
return n--;
}
-// SAFE-LABEL: define dso_local double @test_double_pre_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8
-// SAFE-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
-// SAFE-NEXT: ret double [[TMP1]]
-//
-// UNSAFE-LABEL: define dso_local double @test_double_pre_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
-// UNSAFE-NEXT: ret double [[TMP1]]
+// CHECK-LABEL: define dso_local double @test_double_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
+// CHECK-NEXT: ret double [[TMP1]]
//
double test_double_pre_dc()
{
@@ -162,23 +120,14 @@ double test_double_pre_dc()
return --n;
}
-// SAFE-LABEL: define dso_local double @test_double_pre_inc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8
-// SAFE-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
-// SAFE-NEXT: ret double [[TMP1]]
-//
-// UNSAFE-LABEL: define dso_local double @test_double_pre_inc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
-// UNSAFE-NEXT: ret double [[TMP1]]
+// CHECK-LABEL: define dso_local double @test_double_pre_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+// CHECK-NEXT: ret double [[TMP1]]
//
double test_double_pre_inc()
{
@@ -186,21 +135,13 @@ double test_double_pre_inc()
return ++n;
}
-// SAFE-LABEL: define dso_local half @test__Float16_post_inc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2
-// SAFE-NEXT: ret half [[TMP0]]
-//
-// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: ret half [[TMP0]]
+// CHECK-LABEL: define dso_local half @test__Float16_post_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: ret half [[TMP0]]
//
_Float16 test__Float16_post_inc()
{
@@ -208,21 +149,13 @@ _Float16 test__Float16_post_inc()
return n++;
}
-// SAFE-LABEL: define dso_local half @test__Float16_post_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2
-// SAFE-NEXT: ret half [[TMP0]]
-//
-// UNSAFE-LABEL: define dso_local half @test__Float16_post_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: ret half [[TMP0]]
+// CHECK-LABEL: define dso_local half @test__Float16_post_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: ret half [[TMP0]]
//
_Float16 test__Float16_post_dc()
{
@@ -230,23 +163,14 @@ _Float16 test__Float16_post_dc()
return n--;
}
-// SAFE-LABEL: define dso_local half @test__Float16_pre_dc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2
-// SAFE-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
-// SAFE-NEXT: ret half [[TMP1]]
-//
-// UNSAFE-LABEL: define dso_local half @test__Float16_pre_dc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
-// UNSAFE-NEXT: ret half [[TMP1]]
+// CHECK-LABEL: define dso_local half @test__Float16_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
+// CHECK-NEXT: ret half [[TMP1]]
//
_Float16 test__Float16_pre_dc()
{
@@ -254,23 +178,14 @@ _Float16 test__Float16_pre_dc()
return --n;
}
-// SAFE-LABEL: define dso_local half @test__Float16_pre_inc(
-// SAFE-SAME: ) #[[ATTR0]] {
-// SAFE-NEXT: [[ENTRY:.*:]]
-// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2
-// SAFE-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
-// SAFE-NEXT: ret half [[TMP1]]
-//
-// UNSAFE-LABEL: define dso_local half @test__Float16_pre_inc(
-// UNSAFE-SAME: ) #[[ATTR0]] {
-// UNSAFE-NEXT: [[ENTRY:.*:]]
-// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
-// UNSAFE-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
-// UNSAFE-NEXT: ret half [[TMP1]]
+// CHECK-LABEL: define dso_local half @test__Float16_pre_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// CHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
+// CHECK-NEXT: ret half [[TMP1]]
//
_Float16 test__Float16_pre_inc()
{
@@ -278,7 +193,7 @@ _Float16 test__Float16_pre_inc()
return ++n;
}
//.
+// SAFE: [[META3]] = !{}
+//.
// UNSAFE: [[META3]] = !{}
//.
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// CHECK: {{.*}}
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 37fca61..22c40e6 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -1,19 +1,19 @@
// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
-// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s
+// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=FUN,CHECK,SAFEIR %s
// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
-// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s
+// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=FUN,CHECK,UNSAFEIR %s
// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
-// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
+// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=FUN,SAFE %s
// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx942 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \
-// RUN: | FileCheck -check-prefix=UNSAFE %s
+// RUN: | FileCheck -check-prefixes=FUN,UNSAFE %s
// REQUIRES: amdgpu-registered-target
@@ -21,34 +21,32 @@
#include <stdatomic.h>
__global__ void ffp1(float *p) {
- // CHECK-LABEL: @_Z4ffp1Pf
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
- // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]]{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
-
- // SAFE: _Z4ffp1Pf
- // SAFE: global_atomic_cmpswap
- // SAFE: global_atomic_cmpswap
- // SAFE: global_atomic_cmpswap
- // SAFE: global_atomic_cmpswap
- // SAFE: global_atomic_cmpswap
+ // FUN-LABEL: @_Z4ffp1Pf
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, [[DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, [[DEFMD]]
+ // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]], [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, [[FADDMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+, !amdgpu.ignore.denormal.mode ![0-9]+$]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, [[DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]], [[FADDMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
+ // SAFE: global_atomic_add_f32
// SAFE: global_atomic_cmpswap
+ // SAFE: global_atomic_max
+ // SAFE: global_atomic_min
+ // SAFE: global_atomic_max
+ // SAFE: global_atomic_min
- // UNSAFE: _Z4ffp1Pf
// UNSAFE: global_atomic_add_f32
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
@@ -68,26 +66,25 @@ __global__ void ffp1(float *p) {
}
__global__ void ffp2(double *p) {
- // CHECK-LABEL: @_Z4ffp2Pd
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
-
- // SAFE-LABEL: @_Z4ffp2Pd
+ // FUN-LABEL: @_Z4ffp2Pd
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
@@ -95,7 +92,6 @@ __global__ void ffp2(double *p) {
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
- // UNSAFE-LABEL: @_Z4ffp2Pd
// UNSAFE: global_atomic_add_f64
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_max_f64
@@ -114,32 +110,31 @@ __global__ void ffp2(double *p) {
// long double is the same as double for amdgcn.
__global__ void ffp3(long double *p) {
- // CHECK-LABEL: @_Z4ffp3Pe
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
- // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
-
- // SAFE-LABEL: @_Z4ffp3Pe
+ // FUN-LABEL: @_Z4ffp3Pe
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
- // UNSAFE-LABEL: @_Z4ffp3Pe
+
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_max_f64
// UNSAFE: global_atomic_min_f64
@@ -156,48 +151,48 @@ __global__ void ffp3(long double *p) {
}
__device__ double ffp4(double *p, float f) {
- // CHECK-LABEL: @_Z4ffp4Pdf
+ // FUN-LABEL: @_Z4ffp4Pdf
// CHECK: fpext contract float {{.*}} to double
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
__atomic_fetch_sub(p, f, memory_order_relaxed);
return __hip_atomic_fetch_sub(p, f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
}
__device__ double ffp5(double *p, int i) {
- // CHECK-LABEL: @_Z4ffp5Pdi
+ // FUN-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
__atomic_fetch_sub(p, i, memory_order_relaxed);
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
return __hip_atomic_fetch_sub(p, i, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
}
__global__ void ffp6(_Float16 *p) {
- // CHECK-LABEL: @_Z4ffp6PDF16
- // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
- // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
- // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
-
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
- // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+ // FUN-LABEL: @_Z4ffp6PDF16
+ // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, [[DEFMD]]
+ // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
+ // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], [[DEFMD]]
// SAFE: _Z4ffp6PDF16
// SAFE: global_atomic_cmpswap
diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu
index d8489b4..a41e6a6 100644
--- a/clang/test/CodeGenCUDA/atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/atomic-ops.cu
@@ -4,14 +4,14 @@
// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK:[0-9]+]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("singlethread") monotonic, align 4{{$}}
__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
@@ -31,8 +31,8 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -42,14 +42,14 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in
// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("wavefront") monotonic, align 4{{$}}
// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("wavefront") monotonic, align 4{{$}}
__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
@@ -69,8 +69,8 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -80,14 +80,14 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v
// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("workgroup") monotonic, align 4{{$}}
__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -105,8 +105,8 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -116,14 +116,14 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v
// CHECK-LABEL: @_Z17atomic32_op_agentPiii
// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("agent") monotonic, align 4{{$}}
__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -141,8 +141,8 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -152,14 +152,14 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val,
// CHECK-LABEL: @_Z18atomic32_op_systemPiii
// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load i32, ptr %{{.*}}, align 4{{$}}
// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} monotonic, align 4{{$}}
__device__ int atomic32_op_system(int *ptr, int val, int desired) {
@@ -179,8 +179,8 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} monotonic, align 4, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -190,14 +190,14 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val,
// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread") monotonic, align 8{{$}}
__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -215,8 +215,8 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, l
}
// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load atomic i64, ptr %{{.*}} syncscope("singlethread") monotonic, align 8{{$}}
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread") monotonic, align 8{{$}}
__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
@@ -230,14 +230,14 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront") monotonic, align 8{{$}}
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront") monotonic, align 8{{$}}
__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
@@ -257,8 +257,8 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long
}
// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront") monotonic, align 8{{$}}
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront") monotonic, align 8{{$}}
__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
@@ -272,14 +272,14 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup") monotonic, align 8{{$}}
__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -297,8 +297,8 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long
}
// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup") monotonic, align 8{{$}}
__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -310,14 +310,14 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent") monotonic, align 8{{$}}
__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -335,8 +335,8 @@ __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long lon
}
// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent") monotonic, align 8{{$}}
__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -348,14 +348,14 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load i64, ptr %{{.*}}, align 8
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic, align 8{{$}}
__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
@@ -375,8 +375,8 @@ __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long lo
}
// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]]{{$}}
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} monotonic, align 8, !noalias.addrspace ![[$NOALIAS_ADDRSPACE_STACK]], [[$DEFMD]]
// CHECK: load i64, ptr %{{.*}}, align 8
// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic, align 8{{$}}
__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
diff --git a/clang/test/CodeGenCUDA/atomic-options.hip b/clang/test/CodeGenCUDA/atomic-options.hip
new file mode 100644
index 0000000..dbc8c31
--- /dev/null
+++ b/clang/test/CodeGenCUDA/atomic-options.hip
@@ -0,0 +1,465 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=DEV %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode \
+// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=OPT %s
+
+#include "Inputs/cuda.h"
+
+// HOST-LABEL: define dso_local void @_Z12test_defaultPf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z12test_defaultPf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z12test_defaultPf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_default(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+}
+
+// HOST-LABEL: define dso_local void @_Z8test_onePf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z8test_onePf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z8test_onePf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_one(float *a) {
+ [[clang::atomic(no_remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// HOST-LABEL: define dso_local void @_Z8test_twoPf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z8test_twoPf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z8test_twoPf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_two(float *a) {
+ [[clang::atomic(remote_memory, ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// HOST-LABEL: define dso_local void @_Z10test_threePf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z10test_threePf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z10test_threePf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_three(float *a) {
+ [[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// HOST-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_multiple_attrs(float *a) {
+ [[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+// HOST-LABEL: define dso_local void @_Z11test_nestedPf(
+// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// HOST-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4
+// HOST-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4
+// HOST-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4
+// HOST-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4
+// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
+// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
+// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1]], align 4
+// HOST-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1]], align 4
+// HOST-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] seq_cst, align 4
+// HOST-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2]], align 4
+// HOST-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2]], align 4
+// HOST-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3]], align 4
+// HOST-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3]], align 4
+// HOST-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] acquire, align 4
+// HOST-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4]], align 4
+// HOST-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4]], align 4
+// HOST-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// HOST-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5]], align 4
+// HOST-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5]], align 4
+// HOST-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] release, align 4
+// HOST-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6]], align 4
+// HOST-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6]], align 4
+// HOST-NEXT: ret void
+//
+// DEV-LABEL: define dso_local void @_Z11test_nestedPf(
+// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// DEV-NEXT: [[ENTRY:.*:]]
+// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
+// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
+// DEV-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
+// DEV-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
+// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
+// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// DEV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// DEV-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// DEV-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
+// DEV-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// DEV-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// DEV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
+// DEV-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
+// DEV-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
+// DEV-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
+// DEV-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
+// DEV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// DEV-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
+// DEV-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
+// DEV-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// DEV-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
+// DEV-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
+// DEV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z11test_nestedPf(
+// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
+// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
+// OPT-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
+// OPT-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
+// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
+// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// OPT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// OPT-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// OPT-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
+// OPT-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// OPT-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// OPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
+// OPT-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
+// OPT-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
+// OPT-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
+// OPT-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
+// OPT-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// OPT-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
+// OPT-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
+// OPT-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// OPT-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
+// OPT-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
+// OPT-NEXT: ret void
+//
+__device__ __host__ void test_nested(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ {
+ [[clang::atomic(remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
+ __scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+ {
+ [[clang::atomic(no_remote_memory)]] {
+ __scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP);
+ }
+ }
+ {
+ [[clang::atomic(no_fine_grained_memory)]] {
+ __scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT);
+ }
+ }
+ }
+ }
+}
+
+//
+//
+//
+//
+template<typename T> __device__ __host__ void test_template(T *a) {
+ [[clang::atomic(no_remote_memory, fine_grained_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+template __device__ __host__ void test_template<float>(float *a);
+
+//.
+// DEV: [[META4]] = !{}
+//.
+// OPT: [[META4]] = !{}
+//.
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 1d85026..214b3a4 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -70,19 +70,19 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *
void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-LABEL: @fi3
- // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE:![0-9]+]]{{$}}
+ // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE:![0-9]+]], [[$DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]]
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}
@@ -186,31 +186,31 @@ void ff2(atomic_float *d) {
float ff3(atomic_float *d) {
// CHECK-LABEL: @ff3
- // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
float ff4(global atomic_float *d, float a) {
// CHECK-LABEL: @ff4
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4, [[$DEFMD]]
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
float ff5(global atomic_double *d, double a) {
// CHECK-LABEL: @ff5
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 8, [[$DEFMD]]
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
float ff4_generic(atomic_float *d, float a) {
// CHECK-LABEL: @ff4_generic
- // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
float ff5_generic(atomic_double *d, double a) {
// CHECK-LABEL: @ff5_generic
- // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace [[$NOPRIVATE]]{{$}}
+ // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace [[$NOPRIVATE]], [[$DEFMD]]
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
diff --git a/clang/test/Driver/atomic-options.hip b/clang/test/Driver/atomic-options.hip
new file mode 100644
index 0000000..e44dac3
--- /dev/null
+++ b/clang/test/Driver/atomic-options.hip
@@ -0,0 +1,6 @@
+// RUN: %clang -### -nogpulib -nogpuinc %s \
+// RUN: -fatomic-fine-grained-memory -fno-atomic-remote-memory -fatomic-ignore-denormal-mode \
+// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-VALID
+
+// CHECK-VALID: "-cc1" {{.*}}"-triple" "amdgcn-amd-amdhsa" {{.*}}"-fatomic-fine-grained-memory" "-fatomic-ignore-denormal-mode"
+// CHECK-VALID: "-cc1" {{.*}}"-triple" {{.*}}"-fatomic-fine-grained-memory" "-fatomic-ignore-denormal-mode"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 8c13137..0aabc8a 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -54,11 +54,11 @@
// RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
-// UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
+// UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fatomic-ignore-denormal-mode"
// RUN: %clang -### -nogpuinc -nogpulib \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFAULT-UNSAFE-FP-ATOMICS %s
-// DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
+// DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fatomic-ignore-denormal-mode"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s
diff --git a/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp
index 7a34113..60d7cb0 100644
--- a/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp
+++ b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp
@@ -11,7 +11,7 @@ double dv, dx;
// DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
-// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4
+// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.no.remote.memory [[META5]]
// DEFAULT-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
// DEFAULT-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// DEFAULT-NEXT: ret void
@@ -20,7 +20,7 @@ double dv, dx;
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0:[0-9]+]] {
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
-// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.ignore.denormal.mode [[META5]]
+// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.no.remote.memory [[META5]], !amdgpu.ignore.denormal.mode [[META5]]
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
// UNSAFE-FP-ATOMICS-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// UNSAFE-FP-ATOMICS-NEXT: ret void
@@ -34,7 +34,7 @@ void atomic_fadd_f32() {
// DEFAULT-SAME: ) #[[ATTR0]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
-// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8
+// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]], !amdgpu.no.remote.memory [[META5]]
// DEFAULT-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
// DEFAULT-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// DEFAULT-NEXT: ret void
@@ -43,7 +43,7 @@ void atomic_fadd_f32() {
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0]] {
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
-// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]], !amdgpu.no.remote.memory [[META5]]
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
// UNSAFE-FP-ATOMICS-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// UNSAFE-FP-ATOMICS-NEXT: ret void
@@ -55,5 +55,7 @@ void atomic_fadd_f64() {
#pragma omp end declare target
//.
+// DEFAULT: [[META5]] = !{}
+//.
// UNSAFE-FP-ATOMICS: [[META5]] = !{}
//.
diff --git a/clang/test/Parser/Inputs/cuda.h b/clang/test/Parser/Inputs/cuda.h
new file mode 100644
index 0000000..405ef8b
--- /dev/null
+++ b/clang/test/Parser/Inputs/cuda.h
@@ -0,0 +1,54 @@
+/* Minimal declarations for CUDA support. Testing purposes only. */
+
+#include <stddef.h>
+
+// Make this file work with nvcc, for testing compatibility.
+
+#ifndef __NVCC__
+#define __constant__ __attribute__((constant))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __host__ __attribute__((host))
+#define __shared__ __attribute__((shared))
+#define __managed__ __attribute__((managed))
+#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+
+struct dim3 {
+ unsigned x, y, z;
+ __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
+};
+
+#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
+
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+#endif
+
+// Host- and device-side placement new overloads.
+void *operator new(__SIZE_TYPE__, void *p) { return p; }
+void *operator new[](__SIZE_TYPE__, void *p) { return p; }
+__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
+__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
+
+#endif // !__NVCC__
diff --git a/clang/test/Parser/atomic-options.hip b/clang/test/Parser/atomic-options.hip
new file mode 100644
index 0000000..4deb967
--- /dev/null
+++ b/clang/test/Parser/atomic-options.hip
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fcuda-is-device %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fcuda-is-device %s \
+// RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode
+
+#include "Inputs/cuda.h"
+
+#if !__has_extension(clang_atomic_attributes)
+#error "We should have atomic attributes support"
+#endif
+
+[[clang::atomic(!no_remote_memory)]] // expected-error {{use of undeclared identifier 'no_remote_memory'}}
+__device__ __host__ void test_location(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ [[clang::atomic(!no_remote_memory)]] int x; // expected-error {{use of undeclared identifier 'no_remote_memory'}}
+}
+
+__device__ __host__ void test_invalid_option(float *a) {
+ [[clang::atomic(fast)]] { // expected-error {{invalid argument 'fast' to atomic attribute; valid options are: 'remote_memory', 'fine_grained_memory', 'ignore_denormal_mode' (optionally prefixed with 'no_')}}
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_invalid_value(float *a) {
+ [[clang::atomic(no_remote_memory(default))]] { // expected-error2 {{expected ','}} expected-error {{expected ')'}}
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_invalid_format(float *a) {
+ [[clang::atomic(no_remote_memory=on)]] { // expected-error2 {{expected ','}} expected-error {{expected ')'}}
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+[[clang::atomic(no_remote_memory)]] // expected-error {{'atomic' attribute cannot be applied to a declaration}}
+__device__ __host__ void test_not_compound_stmt(float *a) {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+}
+
+__device__ __host__ void test_quoted(float *a) {
+ [[clang::atomic("no_remote_memory", "remote_memory")]] { // expected-error {{'atomic' attribute requires an identifier}}
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_one_value(float *a) {
+ [[clang::atomic(no_remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_multiple_value(float *a) {
+ [[clang::atomic(no_remote_memory, fine_grained_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_duplicate_value(float *a) {
+ [[clang::atomic(no_remote_memory, no_remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_conflict_value(float *a) {
+ [[clang::atomic(no_remote_memory, remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}
+
+__device__ __host__ void test_multiple_attrs(float *a) {
+ [[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] {
+ __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+ }
+}