diff options
Diffstat (limited to 'llvm')
178 files changed, 19208 insertions, 1084 deletions
diff --git a/llvm/docs/CommandGuide/llvm-dwarfdump.rst b/llvm/docs/CommandGuide/llvm-dwarfdump.rst index 27ad4226..1378302 100644 --- a/llvm/docs/CommandGuide/llvm-dwarfdump.rst +++ b/llvm/docs/CommandGuide/llvm-dwarfdump.rst @@ -83,7 +83,7 @@ OPTIONS .. option:: -n <name>, --name=<name> Find and print all debug info entries whose name - (`DW_AT_name` attribute) is <name>. + (`DW_AT_name`/`DW_AT_linkage_name` attribute) is <name>. .. option:: --lookup=<address> diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index b94e79e..45f2df2 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -413,6 +413,10 @@ Below are some guidelines about the format of the message itself: message self-explanatory. Note that such non-public links should not be included in the submitted code. +* Avoid 'tagging' someone's username in your commits and PR descriptions + (e.g., `@<someUser>`), doing so results in that account receiving a notification + every time the commit is cherry-picked and/or pushed to a fork. + LLVM uses a squash workflow for pull requests, so as the pull request evolves during review, it's important to update the pull request description over the course of a review. GitHub uses the initial commit message to create the pull diff --git a/llvm/docs/QualGroup.rst b/llvm/docs/QualGroup.rst index 5c05e4e..0e73ec5 100644 --- a/llvm/docs/QualGroup.rst +++ b/llvm/docs/QualGroup.rst @@ -181,6 +181,46 @@ Membership Review To ensure the group remains active and focused, member participation will be reviewed every six months. Inactive members may be removed following this review. +Decision Taking +--------------- + +The LLVM Qualification Working Group aims to make decisions transparently, collaboratively, and without unnecessary formality. The goal is to maintain efficiency while encouraging broad participation and mutual understanding. + +This section describes the lightweight process used to handle proposals and decisions within the group. It may be revised as the group evolves and gains experience. + +Principles +^^^^^^^^^^ + +* **Consensus first:** The preferred mode of decision-making is consensus through open discussion (primarily on Discord or during sync-up meetings). +* **Inclusiveness and respect:** All viewpoints are encouraged, and members are expected to contribute constructively toward reaching a shared understanding. +* **Transparency:** Discussions leading to a decision should be visible to the group and, whenever appropriate, summarized in public channels (e.g., Discourse meeting notes, Discord channel, documentation updates). + +Consensus and Time Limits +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Discussions remain open until a clear consensus emerges, meaning no sustained objections have been raised after reasonable discussion. + +To prevent open-ended debates, if no new viewpoints are expressed after an agreed period (e.g., 2 weeks), the moderator (typically the person who started the discussion thread) may take one of the following actions: + +* **Summarize the apparent consensus** and close the discussion, or +* **Postpone the topic** to the next sync-up meeting if the outcome remains unclear, or +* **Call for a short vote** to confirm the group’s position. + +Voting Procedure +^^^^^^^^^^^^^^^^ + +When consensus cannot be reached or when a clear yes/no decision is needed: + +* The moderator may call for a **simple vote** using emoji reactions on Discord or a similar visible method. +* A decision passes if it receives a **majority (>50%)** of votes among **participants who voted.** Non-votes are **not counted** in the total. +* To ensure decisions reflect the collective position of the group, **at least three-quarters of the total core members** must participate in the vote for it to be considered valid. +* If results are evenly split **(50/50)**, or if participation falls below this threshold, the topic may be postponed to the next sync-up meeting for further discussion. + +Documentation +^^^^^^^^^^^^^ + +Final decisions should be briefly documented (e.g., in meeting minutes, the corresponding GitHub issue, or Discord discussion thread). Once stable, the resulting policy or outcome may be reflected in this documentation for reference. + Current Topics & Backlog ======================== @@ -205,10 +245,11 @@ Slides used to support discussions during sync-up meetings are stored in LLVM's Available slides: +* (add future entries here) +* `October 2025 <https://docs.google.com/presentation/d/1ND2SkjgcHvcEbQmMd8ExL-PpRXouP49T-wfy3xf2yRQ/edit?usp=sharing>`_ * `September 2025 <https://docs.google.com/presentation/d/1SZAE-QHfJED6CxJCCtBkPDxcw7XU9ORX54TJyXe1ppc/edit?usp=sharing>`_ * `August 2025 <https://docs.google.com/presentation/d/1K8GWoRm8ZAeyyGvTeV5f-sMOhMr7WHiEk6_Nm5Fk10o/edit?usp=sharing>`_ * `July 2025 <https://docs.google.com/presentation/d/1ktURe9qz5ggbdOQYK-2ISpiC18B-Y_35WvGyAnnxEpw/edit?usp=sharing>`_ -* (add future entries here) AI Transcription Policy ======================= diff --git a/llvm/include/llvm/ADT/Twine.h b/llvm/include/llvm/ADT/Twine.h index d9f9c0f..e3b4d5e 100644 --- a/llvm/include/llvm/ADT/Twine.h +++ b/llvm/include/llvm/ADT/Twine.h @@ -285,7 +285,7 @@ public: } /// Construct from a StringRef. - /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) { + /*implicit*/ Twine(StringRef Str) : LHSKind(PtrAndLengthKind) { LHS.ptrAndLength.ptr = Str.data(); LHS.ptrAndLength.length = Str.size(); assert(isValid() && "Invalid twine!"); @@ -352,7 +352,7 @@ public: // right thing. Yet. /// Construct as the concatenation of a C string and a StringRef. - /*implicit*/ Twine(const char *LHS, const StringRef &RHS) + /*implicit*/ Twine(const char *LHS, StringRef RHS) : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) { this->LHS.cString = LHS; this->RHS.ptrAndLength.ptr = RHS.data(); @@ -361,7 +361,7 @@ public: } /// Construct as the concatenation of a StringRef and a C string. - /*implicit*/ Twine(const StringRef &LHS, const char *RHS) + /*implicit*/ Twine(StringRef LHS, const char *RHS) : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) { this->LHS.ptrAndLength.ptr = LHS.data(); this->LHS.ptrAndLength.length = LHS.size(); @@ -530,14 +530,14 @@ inline Twine operator+(const Twine &LHS, const Twine &RHS) { /// Additional overload to guarantee simplified codegen; this is equivalent to /// concat(). -inline Twine operator+(const char *LHS, const StringRef &RHS) { +inline Twine operator+(const char *LHS, StringRef RHS) { return Twine(LHS, RHS); } /// Additional overload to guarantee simplified codegen; this is equivalent to /// concat(). -inline Twine operator+(const StringRef &LHS, const char *RHS) { +inline Twine operator+(StringRef LHS, const char *RHS) { return Twine(LHS, RHS); } diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 39c5a8d..af66f2d 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -58,8 +58,6 @@ LLVM_ABI CodeGenFileType getFileType(); LLVM_ABI FramePointerKind getFramePointerUsage(); -LLVM_ABI bool getEnableUnsafeFPMath(); - LLVM_ABI bool getEnableNoInfsFPMath(); LLVM_ABI bool getEnableNoNaNsFPMath(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index b7ccfbb..8db99ba 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -210,8 +210,8 @@ struct SpecificConstantMatch { }; /// Matches a constant equal to \p RequestedValue. -inline SpecificConstantMatch m_SpecificICst(APInt RequestedValue) { - return SpecificConstantMatch(std::move(RequestedValue)); +inline SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue) { + return SpecificConstantMatch(RequestedValue); } inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { @@ -221,7 +221,7 @@ inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { /// Matcher for a specific constant splat. struct SpecificConstantSplatMatch { APInt RequestedVal; - SpecificConstantSplatMatch(const APInt RequestedVal) + SpecificConstantSplatMatch(const APInt &RequestedVal) : RequestedVal(RequestedVal) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { return isBuildVectorConstantSplat(Reg, MRI, RequestedVal, @@ -230,8 +230,9 @@ struct SpecificConstantSplatMatch { }; /// Matches a constant splat of \p RequestedValue. -inline SpecificConstantSplatMatch m_SpecificICstSplat(APInt RequestedValue) { - return SpecificConstantSplatMatch(std::move(RequestedValue)); +inline SpecificConstantSplatMatch +m_SpecificICstSplat(const APInt &RequestedValue) { + return SpecificConstantSplatMatch(RequestedValue); } inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) { @@ -242,7 +243,7 @@ inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) { /// Matcher for a specific constant or constant splat. struct SpecificConstantOrSplatMatch { APInt RequestedVal; - SpecificConstantOrSplatMatch(const APInt RequestedVal) + SpecificConstantOrSplatMatch(const APInt &RequestedVal) : RequestedVal(RequestedVal) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { APInt MatchedVal; @@ -263,8 +264,8 @@ struct SpecificConstantOrSplatMatch { /// Matches a \p RequestedValue constant or a constant splat of \p /// RequestedValue. inline SpecificConstantOrSplatMatch -m_SpecificICstOrSplat(APInt RequestedValue) { - return SpecificConstantOrSplatMatch(std::move(RequestedValue)); +m_SpecificICstOrSplat(const APInt &RequestedValue) { + return SpecificConstantOrSplatMatch(RequestedValue); } inline SpecificConstantOrSplatMatch diff --git a/llvm/include/llvm/CodeGen/MIR2Vec.h b/llvm/include/llvm/CodeGen/MIR2Vec.h index 7b1b5d9..f6b0571 100644 --- a/llvm/include/llvm/CodeGen/MIR2Vec.h +++ b/llvm/include/llvm/CodeGen/MIR2Vec.h @@ -52,11 +52,21 @@ class LLVMContext; class MIR2VecVocabLegacyAnalysis; class TargetInstrInfo; +enum class MIR2VecKind { Symbolic }; + namespace mir2vec { + +// Forward declarations +class MIREmbedder; +class SymbolicMIREmbedder; + extern llvm::cl::OptionCategory MIR2VecCategory; extern cl::opt<float> OpcWeight; using Embedding = ir2vec::Embedding; +using MachineInstEmbeddingsMap = DenseMap<const MachineInstr *, Embedding>; +using MachineBlockEmbeddingsMap = + DenseMap<const MachineBasicBlock *, Embedding>; /// Class for storing and accessing the MIR2Vec vocabulary. /// The MIRVocabulary class manages seed embeddings for LLVM Machine IR @@ -107,19 +117,91 @@ public: const_iterator end() const { return Storage.end(); } - /// Total number of entries in the vocabulary - size_t getCanonicalSize() const { return Storage.size(); } - MIRVocabulary() = delete; /// Factory method to create MIRVocabulary from vocabulary map static Expected<MIRVocabulary> create(VocabMap &&Entries, const TargetInstrInfo &TII); + /// Create a dummy vocabulary for testing purposes. + static Expected<MIRVocabulary> + createDummyVocabForTest(const TargetInstrInfo &TII, unsigned Dim = 1); + + /// Total number of entries in the vocabulary + size_t getCanonicalSize() const { return Storage.size(); } + private: MIRVocabulary(VocabMap &&Entries, const TargetInstrInfo &TII); }; +/// Base class for MIR embedders +class MIREmbedder { +protected: + const MachineFunction &MF; + const MIRVocabulary &Vocab; + + /// Dimension of the embeddings; Captured from the vocabulary + const unsigned Dimension; + + /// Weight for opcode embeddings + const float OpcWeight; + + MIREmbedder(const MachineFunction &MF, const MIRVocabulary &Vocab) + : MF(MF), Vocab(Vocab), Dimension(Vocab.getDimension()), + OpcWeight(mir2vec::OpcWeight) {} + + /// Function to compute embeddings. + Embedding computeEmbeddings() const; + + /// Function to compute the embedding for a given machine basic block. + Embedding computeEmbeddings(const MachineBasicBlock &MBB) const; + + /// Function to compute the embedding for a given machine instruction. + /// Specific to the kind of embeddings being computed. + virtual Embedding computeEmbeddings(const MachineInstr &MI) const = 0; + +public: + virtual ~MIREmbedder() = default; + + /// Factory method to create an Embedder object of the specified kind + /// Returns nullptr if the requested kind is not supported. + static std::unique_ptr<MIREmbedder> create(MIR2VecKind Mode, + const MachineFunction &MF, + const MIRVocabulary &Vocab); + + /// Computes and returns the embedding for a given machine instruction MI in + /// the machine function MF. + Embedding getMInstVector(const MachineInstr &MI) const { + return computeEmbeddings(MI); + } + + /// Computes and returns the embedding for a given machine basic block in the + /// machine function MF. + Embedding getMBBVector(const MachineBasicBlock &MBB) const { + return computeEmbeddings(MBB); + } + + /// Computes and returns the embedding for the current machine function. + Embedding getMFunctionVector() const { + // Currently, we always (re)compute the embeddings for the function. This is + // cheaper than caching the vector. + return computeEmbeddings(); + } +}; + +/// Class for computing Symbolic embeddings +/// Symbolic embeddings are constructed based on the entity-level +/// representations obtained from the MIR Vocabulary. +class SymbolicMIREmbedder : public MIREmbedder { +private: + Embedding computeEmbeddings(const MachineInstr &MI) const override; + +public: + SymbolicMIREmbedder(const MachineFunction &F, const MIRVocabulary &Vocab); + static std::unique_ptr<SymbolicMIREmbedder> + create(const MachineFunction &MF, const MIRVocabulary &Vocab); +}; + } // namespace mir2vec /// Pass to analyze and populate MIR2Vec vocabulary from a module @@ -166,6 +248,31 @@ public: } }; +/// This pass prints the MIR2Vec embeddings for machine functions, basic blocks, +/// and instructions +class MIR2VecPrinterLegacyPass : public MachineFunctionPass { + raw_ostream &OS; + +public: + static char ID; + explicit MIR2VecPrinterLegacyPass(raw_ostream &OS) + : MachineFunctionPass(ID), OS(OS) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MIR2VecVocabLegacyAnalysis>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "MIR2Vec Embedder Printer Pass"; + } +}; + +/// Create a machine pass that prints MIR2Vec embeddings +MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS); + } // namespace llvm #endif // LLVM_CODEGEN_MIR2VEC_H
\ No newline at end of file diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 272b4ac..7fae550 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -93,6 +93,10 @@ createMachineFunctionPrinterPass(raw_ostream &OS, LLVM_ABI MachineFunctionPass * createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS); +/// MIR2VecPrinter pass - This pass prints out the MIR2Vec embeddings for +/// machine functions, basic blocks and instructions. +LLVM_ABI MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS); + /// StackFramePrinter pass - This pass prints out the machine function's /// stack frame to the given stream as a debugging tool. LLVM_ABI MachineFunctionPass *createStackFrameLayoutAnalysisPass(); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 73f2c55..64a7563 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2459,6 +2459,12 @@ public: return ISD::ANY_EXTEND; } + /// Returns how the platform's atomic rmw operations expect their input + /// argument to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). + virtual ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const { + return ISD::ANY_EXTEND; + } + /// @} /// Returns true if we should normalize diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 8e7d9dc..8ce2b1b 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -410,7 +410,6 @@ def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">; def NoSignedZerosFPMath : StrBoolAttr<"no-signed-zeros-fp-math">; -def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">; def NoJumpTables : StrBoolAttr<"no-jump-tables">; def NoInlineLineTables : StrBoolAttr<"no-inline-line-tables">; def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">; @@ -474,7 +473,6 @@ def : MergeRule<"setAND<LessPreciseFPMADAttr>">; def : MergeRule<"setAND<NoInfsFPMathAttr>">; def : MergeRule<"setAND<NoNansFPMathAttr>">; def : MergeRule<"setAND<NoSignedZerosFPMathAttr>">; -def : MergeRule<"setAND<UnsafeFPMathAttr>">; def : MergeRule<"setOR<NoImplicitFloatAttr>">; def : MergeRule<"setOR<NoJumpTablesAttr>">; def : MergeRule<"setOR<ProfileSampleAccurateAttr>">; diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index 31096e8..540d60a 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -96,6 +96,16 @@ namespace llvm { /// info. Return true if module is modified. LLVM_ABI bool UpgradeDebugInfo(Module &M); + /// Copies module attributes to the functions in the module. + /// Currently only effects ARM, Thumb and AArch64 targets. + /// Supported attributes: + /// - branch-target-enforcement + /// - branch-protection-pauth-lr + /// - guarded-control-stack + /// - sign-return-address + /// - sign-return-address-with-bkey + void copyModuleAttrToFunctions(Module &M); + /// Check whether a string looks like an old loop attachment tag. inline bool mayBeOldLoopAttachmentTag(StringRef Name) { return Name.starts_with("llvm.vectorizer."); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 12d1c25..e6cce9a4 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2851,7 +2851,15 @@ def int_ptrauth_blend : def int_ptrauth_sign_generic : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; +//===----------------- AllocToken Intrinsics ------------------------------===// + +// Return the token ID for the given !alloc_token metadata. +def int_alloc_token_id : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty], + [IntrNoMem, NoUndef<RetIndex>]>; + //===----------------------------------------------------------------------===// + //===------- Convergence Intrinsics ---------------------------------------===// def int_experimental_convergence_entry diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index ada3523..0135989 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -236,7 +236,7 @@ private: static bool hasAEABILibcalls(const Triple &TT) { return TT.isTargetAEABI() || TT.isTargetGNUAEABI() || - TT.isTargetMuslAEABI() || TT.isAndroid(); + TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isAndroid(); } LLVM_READONLY diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index cd774e7..d507ba2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -222,6 +222,7 @@ LLVM_ABI void initializeMachineSanitizerBinaryMetadataLegacyPass(PassRegistry &); LLVM_ABI void initializeMIR2VecVocabLegacyAnalysisPass(PassRegistry &); LLVM_ABI void initializeMIR2VecVocabPrinterLegacyPassPass(PassRegistry &); +LLVM_ABI void initializeMIR2VecPrinterLegacyPassPass(PassRegistry &); LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &); LLVM_ABI void initializeMachineSinkingLegacyPass(PassRegistry &); LLVM_ABI void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Support/AllocToken.h b/llvm/include/llvm/Support/AllocToken.h new file mode 100644 index 0000000..8d82670 --- /dev/null +++ b/llvm/include/llvm/Support/AllocToken.h @@ -0,0 +1,58 @@ +//===- llvm/Support/AllocToken.h - Allocation Token Calculation -----*- C++ -*// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definition of AllocToken modes and shared calculation of stateless token IDs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ALLOCTOKEN_H +#define LLVM_SUPPORT_ALLOCTOKEN_H + +#include "llvm/ADT/SmallString.h" +#include <cstdint> +#include <optional> + +namespace llvm { + +/// Modes for generating allocation token IDs. +enum class AllocTokenMode { + /// Incrementally increasing token ID. + Increment, + + /// Simple mode that returns a statically-assigned random token ID. + Random, + + /// Token ID based on allocated type hash. + TypeHash, + + /// Token ID based on allocated type hash, where the top half ID-space is + /// reserved for types that contain pointers and the bottom half for types + /// that do not contain pointers. + TypeHashPointerSplit, +}; + +/// Metadata about an allocation used to generate a token ID. +struct AllocTokenMetadata { + SmallString<64> TypeName; + bool ContainsPointer; +}; + +/// Calculates stable allocation token ID. Returns std::nullopt for stateful +/// modes that are only available in the AllocToken pass. +/// +/// \param Mode The token generation mode. +/// \param Metadata The metadata about the allocation. +/// \param MaxTokens The maximum number of tokens (must not be 0) +/// \return The calculated allocation token ID, or std::nullopt. +LLVM_ABI std::optional<uint64_t> +getAllocToken(AllocTokenMode Mode, const AllocTokenMetadata &Metadata, + uint64_t MaxTokens); + +} // end namespace llvm + +#endif // LLVM_SUPPORT_ALLOCTOKEN_H diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 2c2122a..bfd2817 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -118,9 +118,8 @@ enum CodeObjectVersionKind { class TargetOptions { public: TargetOptions() - : UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), - NoTrappingFPMath(true), NoSignedZerosFPMath(false), - EnableAIXExtendedAltivecABI(false), + : NoInfsFPMath(false), NoNaNsFPMath(false), NoTrappingFPMath(true), + NoSignedZerosFPMath(false), EnableAIXExtendedAltivecABI(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), @@ -156,13 +155,6 @@ public: /// MCAsmInfo::BinutilsVersion. std::pair<int, int> BinutilsVersion{0, 0}; - /// UnsafeFPMath - This flag is enabled when the - /// -enable-unsafe-fp-math flag is specified on the command line. When - /// this flag is off (the default), the code generator is not allowed to - /// produce results that are "less precise" than IEEE allows. This includes - /// use of X86 instructions like FSIN and FCOS instead of libcalls. - unsigned UnsafeFPMath : 1; - /// NoInfsFPMath - This flag is enabled when the /// -enable-no-infs-fp-math flag is specified on the command line. When /// this flag is off (the default), the code generator is not allowed to diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index dc8cd86d..5e43444 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -935,7 +935,8 @@ public: getEnvironment() == Triple::GNUEABIHF || getEnvironment() == Triple::GNUEABIHFT64 || getEnvironment() == Triple::OpenHOS || - getEnvironment() == Triple::MuslEABIHF || isAndroid()) && + getEnvironment() == Triple::MuslEABIHF || isOSFuchsia() || + isAndroid()) && isOSBinFormatELF() && !isOSNetBSD(); } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 8da51d0..b573023 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, return nullptr; } +/// Look for the following pattern and simplify %to_fold to %identicalPhi. +/// Here %phi, %to_fold and %phi.next perform the same functionality as +/// %identicalPhi and hence the select instruction %to_fold can be folded +/// into %identicalPhi. +/// +/// BB1: +/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ] +/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ] +/// ... +/// %identicalPhi.next = select %cmp, %val, %identicalPhi +/// (or select %cmp, %identicalPhi, %val) +/// %to_fold = select %cmp2, %identicalPhi, %phi +/// %phi.next = select %cmp, %val, %to_fold +/// (or select %cmp, %to_fold, %val) +/// +/// Prove that %phi and %identicalPhi are the same by induction: +/// +/// Base case: Both %phi and %identicalPhi are equal on entry to the loop. +/// Inductive case: +/// Suppose %phi and %identicalPhi are equal at iteration i. +/// We look at their values at iteration i+1 which are %phi.next and +/// %identicalPhi.next. They would have become different only when %cmp is +/// false and the corresponding values %to_fold and %identicalPhi differ +/// (similar reason for the other "or" case in the bracket). +/// +/// The only condition when %to_fold and %identicalPh could differ is when %cmp2 +/// is false and %to_fold is %phi, which contradicts our inductive hypothesis +/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are +/// always equal at iteration i+1. +bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) { + if (PN.getParent() != IdenticalPN.getParent()) + return false; + if (PN.getNumIncomingValues() != 2) + return false; + + // Check that only the backedge incoming value is different. + unsigned DiffVals = 0; + BasicBlock *DiffValBB = nullptr; + for (unsigned i = 0; i < 2; i++) { + BasicBlock *PredBB = PN.getIncomingBlock(i); + if (PN.getIncomingValueForBlock(PredBB) != + IdenticalPN.getIncomingValueForBlock(PredBB)) { + DiffVals++; + DiffValBB = PredBB; + } + } + if (DiffVals != 1) + return false; + // Now check that the backedge incoming values are two select + // instructions with the same condition. Either their true + // values are the same, or their false values are the same. + auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB)); + auto *IdenticalSI = + dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB)); + if (!SI || !IdenticalSI) + return false; + if (SI->getCondition() != IdenticalSI->getCondition()) + return false; + + SelectInst *SIOtherVal = nullptr; + Value *IdenticalSIOtherVal = nullptr; + if (SI->getTrueValue() == IdenticalSI->getTrueValue()) { + SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue()); + IdenticalSIOtherVal = IdenticalSI->getFalseValue(); + } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) { + SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue()); + IdenticalSIOtherVal = IdenticalSI->getTrueValue(); + } else { + return false; + } + + // Now check that the other values in select, i.e., %to_fold and + // %identicalPhi, are essentially the same value. + if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN) + return false; + if (!(SIOtherVal->getTrueValue() == &IdenticalPN && + SIOtherVal->getFalseValue() == &PN) && + !(SIOtherVal->getTrueValue() == &PN && + SIOtherVal->getFalseValue() == &IdenticalPN)) + return false; + return true; +} + /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, @@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL); if (Imp) return *Imp ? TrueVal : FalseVal; - + // Look for same PHIs in the true and false values. + if (auto *TruePHI = dyn_cast<PHINode>(TrueVal)) + if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) { + if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI)) + return FalseVal; + if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI)) + return TrueVal; + } return nullptr; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index cf63285..f71a534 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -451,6 +451,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { UpgradeModuleFlags(*M); UpgradeNVVMAnnotations(*M); UpgradeSectionAttributes(*M); + copyModuleAttrToFunctions(*M); if (!Slots) return false; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index aaee1f0..cf7efbfa 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7143,6 +7143,8 @@ Error BitcodeReader::materializeModule() { UpgradeARCRuntime(*TheModule); + copyModuleAttrToFunctions(*TheModule); + return Error::success(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e2af0c5..fefde64f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1438,6 +1438,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BBFreqEnabled, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, + // Use static_cast to avoid breakage of tests on windows. static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, false}; diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index c438eae..9795a0b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -98,6 +98,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineUniformityAnalysisPassPass(Registry); initializeMIR2VecVocabLegacyAnalysisPass(Registry); initializeMIR2VecVocabPrinterLegacyPassPass(Registry); + initializeMIR2VecPrinterLegacyPassPass(Registry); initializeMachineUniformityInfoPrinterPassPass(Registry); initializeMachineVerifierLegacyPassPass(Registry); initializeObjCARCContractLegacyPassPass(Registry); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 0522698..c1365f4 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -64,7 +64,6 @@ CGOPT_EXP(uint64_t, LargeDataThreshold) CGOPT(ExceptionHandling, ExceptionModel) CGOPT_EXP(CodeGenFileType, FileType) CGOPT(FramePointerKind, FramePointerUsage) -CGOPT(bool, EnableUnsafeFPMath) CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) CGOPT(bool, EnableNoSignedZerosFPMath) @@ -219,12 +218,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "Enable frame pointer elimination"))); CGBINDOPT(FramePointerUsage); - static cl::opt<bool> EnableUnsafeFPMath( - "enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::init(false)); - CGBINDOPT(EnableUnsafeFPMath); - static cl::opt<bool> EnableNoInfsFPMath( "enable-no-infs-fp-math", cl::desc("Enable FP math optimizations that assume no +-Infs"), @@ -552,7 +545,6 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { TargetOptions Options; Options.AllowFPOpFusion = getFuseFPOps(); - Options.UnsafeFPMath = getEnableUnsafeFPMath(); Options.NoInfsFPMath = getEnableNoInfsFPMath(); Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); @@ -706,7 +698,6 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, if (getStackRealign()) NewAttrs.addAttribute("stackrealign"); - HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math"); HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index 5c78d98..99be1fc0 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MIR2Vec.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Module.h" @@ -29,20 +30,30 @@ using namespace mir2vec; STATISTIC(MIRVocabMissCounter, "Number of lookups to MIR entities not present in the vocabulary"); -cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options"); +namespace llvm { +namespace mir2vec { +cl::OptionCategory MIR2VecCategory("MIR2Vec Options"); // FIXME: Use a default vocab when not specified static cl::opt<std::string> VocabFile("mir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""), cl::cat(MIR2VecCategory)); -cl::opt<float> - llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), - cl::desc("Weight for machine opcode embeddings"), - cl::cat(MIR2VecCategory)); +cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for machine opcode embeddings"), + cl::cat(MIR2VecCategory)); +cl::opt<MIR2VecKind> MIR2VecEmbeddingKind( + "mir2vec-kind", cl::Optional, + cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic", + "Generate symbolic embeddings for MIR")), + cl::init(MIR2VecKind::Symbolic), cl::desc("MIR2Vec embedding kind"), + cl::cat(MIR2VecCategory)); + +} // namespace mir2vec +} // namespace llvm //===----------------------------------------------------------------------===// -// Vocabulary Implementation +// Vocabulary //===----------------------------------------------------------------------===// MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries, @@ -188,6 +199,28 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() { << " unique base opcodes\n"); } +Expected<MIRVocabulary> +MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII, + unsigned Dim) { + assert(Dim > 0 && "Dimension must be greater than zero"); + + float DummyVal = 0.1f; + + // Create dummy embeddings for all canonical opcode names + VocabMap DummyVocabMap; + for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) { + std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode)); + if (DummyVocabMap.count(BaseOpcode) == 0) { + // Only add if not already present + DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal); + DummyVal += 0.1f; + } + } + + // Create and return vocabulary with dummy embeddings + return MIRVocabulary::create(std::move(DummyVocabMap), TII); +} + //===----------------------------------------------------------------------===// // MIR2VecVocabLegacyAnalysis Implementation //===----------------------------------------------------------------------===// @@ -258,7 +291,73 @@ MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) { } //===----------------------------------------------------------------------===// -// Printer Passes Implementation +// MIREmbedder and its subclasses +//===----------------------------------------------------------------------===// + +std::unique_ptr<MIREmbedder> MIREmbedder::create(MIR2VecKind Mode, + const MachineFunction &MF, + const MIRVocabulary &Vocab) { + switch (Mode) { + case MIR2VecKind::Symbolic: + return std::make_unique<SymbolicMIREmbedder>(MF, Vocab); + } + return nullptr; +} + +Embedding MIREmbedder::computeEmbeddings(const MachineBasicBlock &MBB) const { + Embedding MBBVector(Dimension, 0); + + // Get instruction info for opcode name resolution + const auto &Subtarget = MF.getSubtarget(); + const auto *TII = Subtarget.getInstrInfo(); + if (!TII) { + MF.getFunction().getContext().emitError( + "MIR2Vec: No TargetInstrInfo available; cannot compute embeddings"); + return MBBVector; + } + + // Process each machine instruction in the basic block + for (const auto &MI : MBB) { + // Skip debug instructions and other metadata + if (MI.isDebugInstr()) + continue; + MBBVector += computeEmbeddings(MI); + } + + return MBBVector; +} + +Embedding MIREmbedder::computeEmbeddings() const { + Embedding MFuncVector(Dimension, 0); + + // Consider all reachable machine basic blocks in the function + for (const auto *MBB : depth_first(&MF)) + MFuncVector += computeEmbeddings(*MBB); + return MFuncVector; +} + +SymbolicMIREmbedder::SymbolicMIREmbedder(const MachineFunction &MF, + const MIRVocabulary &Vocab) + : MIREmbedder(MF, Vocab) {} + +std::unique_ptr<SymbolicMIREmbedder> +SymbolicMIREmbedder::create(const MachineFunction &MF, + const MIRVocabulary &Vocab) { + return std::make_unique<SymbolicMIREmbedder>(MF, Vocab); +} + +Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const { + // Skip debug instructions and other metadata + if (MI.isDebugInstr()) + return Embedding(Dimension, 0); + + // Todo: Add operand/argument contributions + + return Vocab[MI.getOpcode()]; +} + +//===----------------------------------------------------------------------===// +// Printer Passes //===----------------------------------------------------------------------===// char MIR2VecVocabPrinterLegacyPass::ID = 0; @@ -297,3 +396,56 @@ MachineFunctionPass * llvm::createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS) { return new MIR2VecVocabPrinterLegacyPass(OS); } + +char MIR2VecPrinterLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(MIR2VecPrinterLegacyPass, "print-mir2vec", + "MIR2Vec Embedder Printer Pass", false, true) +INITIALIZE_PASS_DEPENDENCY(MIR2VecVocabLegacyAnalysis) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) +INITIALIZE_PASS_END(MIR2VecPrinterLegacyPass, "print-mir2vec", + "MIR2Vec Embedder Printer Pass", false, true) + +bool MIR2VecPrinterLegacyPass::runOnMachineFunction(MachineFunction &MF) { + auto &Analysis = getAnalysis<MIR2VecVocabLegacyAnalysis>(); + auto VocabOrErr = + Analysis.getMIR2VecVocabulary(*MF.getFunction().getParent()); + assert(VocabOrErr && "Failed to get MIR2Vec vocabulary"); + auto &MIRVocab = *VocabOrErr; + + auto Emb = mir2vec::MIREmbedder::create(MIR2VecEmbeddingKind, MF, MIRVocab); + if (!Emb) { + OS << "Error creating MIR2Vec embeddings for function " << MF.getName() + << "\n"; + return false; + } + + OS << "MIR2Vec embeddings for machine function " << MF.getName() << ":\n"; + OS << "Machine Function vector: "; + Emb->getMFunctionVector().print(OS); + + OS << "Machine basic block vectors:\n"; + for (const MachineBasicBlock &MBB : MF) { + OS << "Machine basic block: " << MBB.getFullName() << ":\n"; + Emb->getMBBVector(MBB).print(OS); + } + + OS << "Machine instruction vectors:\n"; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + // Skip debug instructions as they are not + // embedded + if (MI.isDebugInstr()) + continue; + + OS << "Machine instruction: "; + MI.print(OS); + Emb->getMInstVector(MI).print(OS); + } + } + + return false; +} + +MachineFunctionPass *llvm::createMIR2VecPrinterLegacyPass(raw_ostream &OS) { + return new MIR2VecPrinterLegacyPass(OS); +} diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 7acddff..729e73c 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -932,12 +932,11 @@ void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) { void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); - for (const auto &RPIdAndCost : Cost) { - unsigned Class = RPIdAndCost.first; - if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) + for (const auto &[Class, Weight] : Cost) { + if (static_cast<int>(RegPressure[Class]) < -Weight) RegPressure[Class] = 0; else - RegPressure[Class] += RPIdAndCost.second; + RegPressure[Class] += Weight; } } @@ -1215,11 +1214,10 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) { - for (const auto &RPIdAndCost : Cost) { - if (RPIdAndCost.second <= 0) + for (const auto &[Class, Weight] : Cost) { + if (Weight <= 0) continue; - unsigned Class = RPIdAndCost.first; int Limit = RegLimit[Class]; // Don't hoist cheap instructions if they would increase register pressure, @@ -1228,7 +1226,7 @@ bool MachineLICMImpl::CanCauseHighRegPressure( return true; for (const auto &RP : BackTrace) - if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) + if (static_cast<int>(RP[Class]) + Weight >= Limit) return true; } @@ -1246,8 +1244,8 @@ void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (auto &RP : BackTrace) - for (const auto &RPIdAndCost : Cost) - RP[RPIdAndCost.first] += RPIdAndCost.second; + for (const auto &[Class, Weight] : Cost) + RP[Class] += Weight; } /// Return true if it is potentially profitable to hoist the given loop diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 437d0f4..bf1abfe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3765,6 +3765,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::LRINT: case ISD::LLRINT: + case ISD::LROUND: + case ISD::LLROUND: Res = SoftPromoteHalfOp_Op0WithStrict(N); break; case ISD::FP_TO_SINT_SAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 88a4a8b..b1776ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -429,7 +429,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op2 = N->getOperand(2); + switch (TLI.getExtendForAtomicRMWArg(N->getOpcode())) { + case ISD::SIGN_EXTEND: + Op2 = SExtPromotedInteger(Op2); + break; + case ISD::ZERO_EXTEND: + Op2 = ZExtPromotedInteger(Op2); + break; + case ISD::ANY_EXTEND: + Op2 = GetPromotedInteger(Op2); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 5eb86e7..049efe8 100644 --- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -51,7 +51,7 @@ bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const { /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. bool TargetOptions::HonorSignDependentRoundingFPMath() const { - return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; + return HonorSignDependentRoundingFPMathOption; } /// NOTE: There are targets that still do not support the debug entry values diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 10f915d..7e5e7b5 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -6045,6 +6045,120 @@ void llvm::UpgradeFunctionAttributes(Function &F) { } } +// Check if the function attribute is not present and set it. +static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, + StringRef Value) { + if (!F.hasFnAttribute(FnAttrName)) + F.addFnAttr(FnAttrName, Value); +} + +// Check if the function attribute is not present and set it if needed. +// If the attribute is "false" then removes it. +// If the attribute is "true" resets it to a valueless attribute. +static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) { + if (!F.hasFnAttribute(FnAttrName)) { + if (Set) + F.addFnAttr(FnAttrName); + } else { + auto A = F.getFnAttribute(FnAttrName); + if ("false" == A.getValueAsString()) + F.removeFnAttr(FnAttrName); + else if ("true" == A.getValueAsString()) { + F.removeFnAttr(FnAttrName); + F.addFnAttr(FnAttrName); + } + } +} + +void llvm::copyModuleAttrToFunctions(Module &M) { + Triple T(M.getTargetTriple()); + if (!T.isThumb() && !T.isARM() && !T.isAArch64()) + return; + + uint64_t BTEValue = 0; + uint64_t BPPLRValue = 0; + uint64_t GCSValue = 0; + uint64_t SRAValue = 0; + uint64_t SRAALLValue = 0; + uint64_t SRABKeyValue = 0; + + NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); + if (ModFlags) { + for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { + MDNode *Op = ModFlags->getOperand(I); + if (Op->getNumOperands() != 3) + continue; + + MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); + auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2)); + if (!ID || !CI) + continue; + + StringRef IDStr = ID->getString(); + uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue + : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue + : IDStr == "guarded-control-stack" ? &GCSValue + : IDStr == "sign-return-address" ? &SRAValue + : IDStr == "sign-return-address-all" ? &SRAALLValue + : IDStr == "sign-return-address-with-bkey" + ? &SRABKeyValue + : nullptr; + if (!ValPtr) + continue; + + *ValPtr = CI->getZExtValue(); + if (*ValPtr == 2) + return; + } + } + + bool BTE = BTEValue == 1; + bool BPPLR = BPPLRValue == 1; + bool GCS = GCSValue == 1; + bool SRA = SRAValue == 1; + + StringRef SignTypeValue = "non-leaf"; + if (SRA && SRAALLValue == 1) + SignTypeValue = "all"; + + StringRef SignKeyValue = "a_key"; + if (SRA && SRABKeyValue == 1) + SignKeyValue = "b_key"; + + for (Function &F : M.getFunctionList()) { + if (F.isDeclaration()) + continue; + + if (SRA) { + setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue); + setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue); + } else { + if (auto A = F.getFnAttribute("sign-return-address"); + A.isValid() && "none" == A.getValueAsString()) { + F.removeFnAttr("sign-return-address"); + F.removeFnAttr("sign-return-address-key"); + } + } + ConvertFunctionAttr(F, BTE, "branch-target-enforcement"); + ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr"); + ConvertFunctionAttr(F, GCS, "guarded-control-stack"); + } + + if (BTE) + M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2); + if (BPPLR) + M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2); + if (GCS) + M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2); + if (SRA) { + M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2); + if (SRAALLValue == 1) + M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2); + if (SRABKeyValue == 1) + M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2); + } +} + static bool isOldLoopArgument(Metadata *MD) { auto *T = dyn_cast_or_null<MDTuple>(MD); if (!T) diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 1bff6cd..f78d9b0 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1512,6 +1512,11 @@ Error IRLinker::run() { // Loop over all of the linked values to compute type mappings. computeTypeMapping(); + // Convert module level attributes to function level attributes because + // after merging modules the attributes might change and would have different + // effect on the functions as the original module would have. + copyModuleAttrToFunctions(*SrcM); + std::reverse(Worklist.begin(), Worklist.end()); while (!Worklist.empty()) { GlobalValue *GV = Worklist.back(); @@ -1677,6 +1682,11 @@ IRMover::IRMover(Module &M) : Composite(M) { for (const auto *MD : StructTypes.getVisitedMetadata()) { SharedMDs[MD].reset(const_cast<MDNode *>(MD)); } + + // Convert module level attributes to function level attributes because + // after merging modules the attributes might change and would have different + // effect on the functions as the original module would have. + copyModuleAttrToFunctions(M); } Error IRMover::move(std::unique_ptr<Module> Src, diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp new file mode 100644 index 0000000..95ecda2 --- /dev/null +++ b/llvm/lib/Support/AllocToken.cpp @@ -0,0 +1,50 @@ +//===- AllocToken.cpp - Allocation Token Calculation ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definition of AllocToken modes and shared calculation of stateless token IDs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AllocToken.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SipHash.h" + +using namespace llvm; + +static uint64_t getStableHash(const AllocTokenMetadata &Metadata, + uint64_t MaxTokens) { + return getStableSipHash(Metadata.TypeName) % MaxTokens; +} + +std::optional<uint64_t> llvm::getAllocToken(AllocTokenMode Mode, + const AllocTokenMetadata &Metadata, + uint64_t MaxTokens) { + assert(MaxTokens && "Must provide non-zero max tokens"); + + switch (Mode) { + case AllocTokenMode::Increment: + case AllocTokenMode::Random: + // Stateful modes cannot be implemented as a pure function. + return std::nullopt; + + case AllocTokenMode::TypeHash: + return getStableHash(Metadata, MaxTokens); + + case AllocTokenMode::TypeHashPointerSplit: { + if (MaxTokens == 1) + return 0; + const uint64_t HalfTokens = MaxTokens / 2; + uint64_t Hash = getStableHash(Metadata, HalfTokens); + if (Metadata.ContainsPointer) + Hash += HalfTokens; + return Hash; + } + } + + llvm_unreachable(""); +} diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 42b21b5..671a5fe 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -149,6 +149,7 @@ add_llvm_component_library(LLVMSupport AArch64BuildAttributes.cpp ARMAttributeParser.cpp ARMWinEH.cpp + AllocToken.cpp Allocator.cpp AutoConvert.cpp Base64.cpp diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 549c418..f74e52a 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -111,7 +111,7 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); } -LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) { +void SpecialCaseList::Matcher::preprocess(bool BySize) { return std::visit([&](auto &V) { return V.preprocess(BySize); }, M); } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 42ec8ba..7cce033 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in { - defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>; - defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>; - defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>; - defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>; + defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; } defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 1f773e2..3368a50 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -820,7 +820,7 @@ void ARMAsmPrinter::emitAttributes() { auto *BTIValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("branch-target-enforcement")); - if (BTIValue && BTIValue->isOne()) { + if (BTIValue && !BTIValue->isZero()) { // If "+pacbti" is used as an architecture extension, // Tag_BTI_extension is emitted in // ARMTargetStreamer::emitTargetAttributes(). diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 35e1127..b1a668e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1089,7 +1089,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, // Register based DivRem for AEABI (RTABI 4.2) if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() || - TT.isTargetMuslAEABI() || TT.isOSWindows()) { + TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; @@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::LRINT, MVT::f16, Expand); + setOperationAction(ISD::LROUND, MVT::f16, Expand); setOperationAction(ISD::FROUND, MVT::f16, Legal); setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); @@ -20574,7 +20575,7 @@ static TargetLowering::ArgListTy getDivRemArgList( SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || - Subtarget->isTargetWindows()) && + Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 406f4c1..597d311 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1036,6 +1036,7 @@ bool LowOverheadLoop::ValidateLiveOuts() { while (!Worklist.empty()) { MachineInstr *MI = Worklist.pop_back_val(); if (MI->getOpcode() == ARM::MQPRCopy) { + LLVM_DEBUG(dbgs() << " Must generate copy as VMOV: " << *MI); VMOVCopies.insert(MI); MachineInstr *CopySrc = RDI.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg()); @@ -1045,6 +1046,20 @@ bool LowOverheadLoop::ValidateLiveOuts() { LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI); VMOVCopies.clear(); return false; + } else if (isVectorPredicated(MI)) { + // If this is a predicated instruction with merging semantics, + // check where it gets its false lanes from, if any. + int InactiveIdx = findVPTInactiveOperandIdx(*MI); + if (InactiveIdx != -1) { + SmallPtrSet<MachineInstr *, 2> Defs; + MachineInstr *FalseSrc = RDI.getUniqueReachingMIDef( + MI, MI->getOperand(InactiveIdx).getReg()); + if (FalseSrc) { + LLVM_DEBUG(dbgs() + << " Must check source of false lanes for: " << *MI); + Worklist.push_back(FalseSrc); + } + } } } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index b2d368e..4a0883c 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -343,6 +343,7 @@ public: bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); } + bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 431ce38..f5653d4 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -805,6 +805,16 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { return -1; } +int llvm::findVPTInactiveOperandIdx(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (MCID.operands()[i].OperandType == ARM::OPERAND_VPRED_R) + return i + ARM::SUBOP_vpred_r_inactive; + + return -1; +} + ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg) { int PIdx = findFirstVPTPredOperandIdx(MI); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 3ec3a621..1b0bf2d 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -90,6 +90,9 @@ inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { Register PredReg; return getVPTInstrPredicate(MI, PredReg); } +// Identify the input operand in an MVE predicated instruction which +// contributes the values of any inactive vector lanes. +int findVPTInactiveOperandIdx(const MachineInstr &MI); // Recomputes the Block Mask of Instr, a VPT or VPST instruction. // This rebuilds the block mask of the instruction depending on the predicates diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index c8866bf..42e90f0 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -294,6 +294,14 @@ public: if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) RootSignature->eraseFromParent(); + // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and + // causes all tests using the DXIL Validator to fail. + // + // This is a temporary fix and should be replaced with a whitelist once + // we have determined all metadata that the DXIL Validator allows + if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) + ErrNo->eraseFromParent(); + return true; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a94e131..54c8972 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - if (Subtarget.useHVX128BOps()) + if (Subtarget.useHVX128BOps()) { setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); + setOperationAction(ISD::BITCAST, MVT::v64i1, Custom); + } if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { @@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { // Handle bitcast from i32, v2i16, and v4i8 to v32i1. // Splat the input into a 32-element i32 vector, then AND each element // with a unique bitmask to isolate individual bits. - if (ResTy == MVT::v32i1 && - (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && - Subtarget.useHVX128BOps()) { - SDValue Val32 = Val; - if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) - Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); - + auto bitcastI32ToV32I1 = [&](SDValue Val32) { + assert(Val32.getValueType().getSizeInBits() == 32 && + "Input must be 32 bits"); MVT VecTy = MVT::getVectorVT(MVT::i32, 32); SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32); SmallVector<SDValue, 32> Mask; @@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask); SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec); - return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded); + return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded); + }; + // === Case: v32i1 === + if (ResTy == MVT::v32i1 && + (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && + Subtarget.useHVX128BOps()) { + SDValue Val32 = Val; + if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) + Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); + return bitcastI32ToV32I1(Val32); + } + // === Case: v64i1 === + if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) { + // Split i64 into lo/hi 32-bit halves. + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val); + SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted); + + // Reuse the same 32-bit logic twice. + SDValue LoRes = bitcastI32ToV32I1(Lo); + SDValue HiRes = bitcastI32ToV32I1(Hi); + + // Concatenate into a v64i1 predicate. + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes); } if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index e857b2d..edde7ac 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2406,7 +2406,8 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { } bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) { - if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa)) + if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa) || + STI->hasFeature(RISCV::FeatureVendorXSfvfbfexp16e)) return Error( ErrorLoc, "operand must be " diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index b8ec0bb..4bea4c4 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -654,7 +654,10 @@ static constexpr FeatureBitset XqciFeatureGroup = { static constexpr FeatureBitset XSfVectorGroup = { RISCV::FeatureVendorXSfvcp, RISCV::FeatureVendorXSfvqmaccdod, RISCV::FeatureVendorXSfvqmaccqoq, RISCV::FeatureVendorXSfvfwmaccqqq, - RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase}; + RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase, + RISCV::FeatureVendorXSfvfexpa, RISCV::FeatureVendorXSfvfexpa64e, + RISCV::FeatureVendorXSfvfbfexp16e, RISCV::FeatureVendorXSfvfexp16e, + RISCV::FeatureVendorXSfvfexp32e}; static constexpr FeatureBitset XSfSystemGroup = { RISCV::FeatureVendorXSiFivecdiscarddlone, RISCV::FeatureVendorXSiFivecflushdlone, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 50f5a5d..7b9c4b3 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -220,7 +220,8 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED || RISCVVType::getSEW(Imm) > 64 || (RISCVVType::isAltFmt(Imm) && - !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) || + !(STI.hasFeature(RISCV::FeatureStdExtZvfbfa) || + STI.hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))) || (Imm >> 9) != 0) { O << formatImm(Imm); return; diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 5dd4bf4..98b636e 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { + case RISCV::PseudoAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32, + NextMBBI); + case RISCV::PseudoAtomicSwap64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadAdd64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadSub64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadAnd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadAnd64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadOr32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI); + case RISCV::PseudoAtomicLoadOr64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI); + case RISCV::PseudoAtomicLoadXor32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadXor64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64, + NextMBBI); case RISCV::PseudoAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, NextMBBI); case RISCV::PseudoAtomicLoadNand64: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64, NextMBBI); + case RISCV::PseudoAtomicLoadMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadMin64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadMax64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadUMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadUMin64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadUMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadUMax64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64, + NextMBBI); case RISCV::PseudoMaskedAtomicSwap32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, NextMBBI); @@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, switch (BinOp) { default: llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(IncrReg) + .addImm(0); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::And: + BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Or: + BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Xor: + BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; case AtomicRMWInst::Nand: BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) .addReg(DestReg) @@ -433,38 +521,85 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, .addReg(ShamtReg); } -bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, - MachineBasicBlock::iterator &NextMBBI) { - assert(IsMasked == true && - "Should only need to expand masked atomic max/min"); - assert(Width == 32 && "Should never need to expand masked 64-bit operations"); +static void doAtomicMinMaxOpExpansion( + const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB, + MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI.getOperand(4).getImm()); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - MachineFunction *MF = MBB.getParent(); - auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + // .loophead: + // lr.[w|d] dest, (addr) + // mv scratch, dest + // ifnochangeneeded scratch, incr, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg) + .addReg(AddrReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(DestReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Max: { + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(ScratchReg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::Min: { + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(IncrReg) + .addReg(ScratchReg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::UMax: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(ScratchReg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::UMin: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(IncrReg) + .addReg(ScratchReg) + .addMBB(LoopTailMBB); + break; + } - // Insert new MBBs. - MF->insert(++MBB.getIterator(), LoopHeadMBB); - MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); - MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); - MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + // .loopifbody: + // mv scratch, incr + BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(IncrReg) + .addImm(0); - // Set up successors and transfer remaining instructions to DoneMBB. - LoopHeadMBB->addSuccessor(LoopIfBodyMBB); - LoopHeadMBB->addSuccessor(LoopTailMBB); - LoopIfBodyMBB->addSuccessor(LoopTailMBB); - LoopTailMBB->addSuccessor(LoopHeadMBB); - LoopTailMBB->addSuccessor(DoneMBB); - DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); - DoneMBB->transferSuccessors(&MBB); - MBB.addSuccessor(LoopHeadMBB); + // .looptail: + // sc.[w|d] scratch, scratch, (addr) + // bnez scratch, loop + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg); + BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopHeadMBB); +} +static void doMaskedAtomicMinMaxOpExpansion( + const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB, + MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); Register DestReg = MI.getOperand(0).getReg(); Register Scratch1Reg = MI.getOperand(1).getReg(); Register Scratch2Reg = MI.getOperand(2).getReg(); @@ -541,6 +676,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( .addReg(Scratch1Reg) .addReg(RISCV::X0) .addMBB(LoopHeadMBB); +} + +bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + if (!IsMasked) + doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB, + LoopTailMBB, DoneMBB, BinOp, Width, STI); + else + doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, + LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp, + Width, STI); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 19992e6..9e6b7f0 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -218,6 +218,7 @@ def HasStdExtZaamo : Predicate<"Subtarget->hasStdExtZaamo()">, AssemblerPredicate<(any_of FeatureStdExtZaamo), "'Zaamo' (Atomic Memory Operations)">; +def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">; def FeatureStdExtZalrsc : RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">; @@ -1334,6 +1335,44 @@ def HasVendorXSfvfnrclipxfqf AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf), "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">; +// Note: XSfvfbfexp16e depends on either Zvfbfmin _or_ Zvfbfa, which cannot be expressed here in +// TableGen. Instead, we check that in RISCVISAInfo. +def FeatureVendorXSfvfbfexp16e + : RISCVExtension<0, 5, + "SiFive Vector Floating-Point Exponential Function Instruction, BFloat16">; +def HasVendorXSfvfbfexp16e : Predicate<"Subtarget->hasVendorXSfvfbfexp16e()">; + +def FeatureVendorXSfvfexp16e + : RISCVExtension<0, 5, + "SiFive Vector Floating-Point Exponential Function Instruction, Half Precision", + [FeatureStdExtZvfh]>; +def HasVendorXSfvfexp16e : Predicate<"Subtarget->hasVendorXSfvfexp16e()">; + +def FeatureVendorXSfvfexp32e + : RISCVExtension<0, 5, + "SiFive Vector Floating-Point Exponential Function Instruction, Single Precision", + [FeatureStdExtZve32f]>; +def HasVendorXSfvfexp32e : Predicate<"Subtarget->hasVendorXSfvfexp32e()">; + +def HasVendorXSfvfexpAnyFloat : Predicate<"Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">; +def HasVendorXSfvfexpAny : Predicate<"Subtarget->hasVendorXSfvfbfexp16e() || Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">, + AssemblerPredicate<(any_of FeatureVendorXSfvfbfexp16e, FeatureVendorXSfvfexp16e, FeatureVendorXSfvfexp32e), + "'Xsfvfbfexp16e', 'Xsfvfexp16e', or 'Xsfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction)">; + +def FeatureVendorXSfvfexpa + : RISCVExtension<0, 2, + "SiFive Vector Floating-Point Exponential Approximation Instruction", + [FeatureStdExtZve32f]>; +def HasVendorXSfvfexpa : Predicate<"Subtarget->hasVendorXSfvfexpa()">, + AssemblerPredicate<(all_of FeatureVendorXSfvfexpa), + "'Xsfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction)">; + +def FeatureVendorXSfvfexpa64e + : RISCVExtension<0, 2, + "SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision", + [FeatureVendorXSfvfexpa, FeatureStdExtZve64d]>; +def HasVendorXSfvfexpa64e : Predicate<"Subtarget->hasVendorXSfvfexpa64e()">; + def FeatureVendorXSiFivecdiscarddlone : RISCVExtension<1, 0, "SiFive sf.cdiscard.d.l1 Instruction", []>; @@ -1864,7 +1903,7 @@ def FeatureForcedAtomics : SubtargetFeature< "forced-atomics", "HasForcedAtomics", "true", "Assume that lock-free native-width atomics are available">; def HasAtomicLdSt - : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">; + : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">; def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", "AllowTaggedGlobals", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a77d765..26fe9ed 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, else if (Subtarget.hasStdExtZicbop()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); - if (Subtarget.hasStdExtA()) { + if (Subtarget.hasStdExtZalrsc()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) setMinCmpXchgSizeInBits(8); @@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } - if (Subtarget.hasStdExtA()) + if (Subtarget.hasStdExtZaamo()) setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); if (Subtarget.hasForcedAtomics()) { @@ -21875,7 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( // result is then sign extended to XLEN. With +A, the minimum width is // 32 for both 64 and 32. assert(getMinCmpXchgSizeInBits() == 32); - assert(Subtarget.hasStdExtA()); + assert(Subtarget.hasStdExtZalrsc()); return Op.getValueSizeInBits() - 31; } break; @@ -24471,6 +24471,25 @@ ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; } +ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const { + // Zaamo will use amo<op>.w which does not require extension. + if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics()) + return ISD::ANY_EXTEND; + + // Zalrsc pseudo expansions with comparison require sign-extension. + assert(Subtarget.hasStdExtZalrsc()); + switch (Op) { + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return ISD::SIGN_EXTEND; + default: + break; + } + return ISD::ANY_EXTEND; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3f81ed7..9e3e2a9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -245,6 +245,7 @@ public: } ISD::NodeType getExtendForAtomicCmpSwapArg() const override; + ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 12f776b..912b82d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1689,42 +1689,44 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. // TODO: Support more operations. unsigned getPredicatedOpcode(unsigned Opcode) { + // clang-format off switch (Opcode) { - case RISCV::ADD: return RISCV::PseudoCCADD; break; - case RISCV::SUB: return RISCV::PseudoCCSUB; break; - case RISCV::SLL: return RISCV::PseudoCCSLL; break; - case RISCV::SRL: return RISCV::PseudoCCSRL; break; - case RISCV::SRA: return RISCV::PseudoCCSRA; break; - case RISCV::AND: return RISCV::PseudoCCAND; break; - case RISCV::OR: return RISCV::PseudoCCOR; break; - case RISCV::XOR: return RISCV::PseudoCCXOR; break; - - case RISCV::ADDI: return RISCV::PseudoCCADDI; break; - case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; - case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; - case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; - case RISCV::ANDI: return RISCV::PseudoCCANDI; break; - case RISCV::ORI: return RISCV::PseudoCCORI; break; - case RISCV::XORI: return RISCV::PseudoCCXORI; break; - - case RISCV::ADDW: return RISCV::PseudoCCADDW; break; - case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; - case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; - case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; - case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; - - case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; - case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; - case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; - case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; - - case RISCV::ANDN: return RISCV::PseudoCCANDN; break; - case RISCV::ORN: return RISCV::PseudoCCORN; break; - case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; - - case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS; break; - case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ; break; + case RISCV::ADD: return RISCV::PseudoCCADD; + case RISCV::SUB: return RISCV::PseudoCCSUB; + case RISCV::SLL: return RISCV::PseudoCCSLL; + case RISCV::SRL: return RISCV::PseudoCCSRL; + case RISCV::SRA: return RISCV::PseudoCCSRA; + case RISCV::AND: return RISCV::PseudoCCAND; + case RISCV::OR: return RISCV::PseudoCCOR; + case RISCV::XOR: return RISCV::PseudoCCXOR; + + case RISCV::ADDI: return RISCV::PseudoCCADDI; + case RISCV::SLLI: return RISCV::PseudoCCSLLI; + case RISCV::SRLI: return RISCV::PseudoCCSRLI; + case RISCV::SRAI: return RISCV::PseudoCCSRAI; + case RISCV::ANDI: return RISCV::PseudoCCANDI; + case RISCV::ORI: return RISCV::PseudoCCORI; + case RISCV::XORI: return RISCV::PseudoCCXORI; + + case RISCV::ADDW: return RISCV::PseudoCCADDW; + case RISCV::SUBW: return RISCV::PseudoCCSUBW; + case RISCV::SLLW: return RISCV::PseudoCCSLLW; + case RISCV::SRLW: return RISCV::PseudoCCSRLW; + case RISCV::SRAW: return RISCV::PseudoCCSRAW; + + case RISCV::ADDIW: return RISCV::PseudoCCADDIW; + case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; + case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; + case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; + + case RISCV::ANDN: return RISCV::PseudoCCANDN; + case RISCV::ORN: return RISCV::PseudoCCORN; + case RISCV::XNOR: return RISCV::PseudoCCXNOR; + + case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS; + case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ; } + // clang-format on return RISCV::INSTRUCTION_LIST_END; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 571d72f..5c81a09 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -158,9 +158,9 @@ class seq_cst_store<PatFrag base> } } // IsAtomic = 1 -// Atomic load/store are available under both +a and +force-atomics. -// Fences will be inserted for atomic load/stores according to the logic in -// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}. +// Atomic load/store are available under +zalrsc (thus also +a) and +// +force-atomics. Fences will be inserted for atomic load/stores according to +// the logic in RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}. // The normal loads/stores are relaxed (unordered) loads/stores that don't have // any ordering. This is necessary because AtomicExpandPass has added fences to // atomic load/stores and changed them to unordered ones. @@ -308,7 +308,65 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; -let Predicates = [HasStdExtA] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZaamo] in { + +let Size = 16 in { +def PseudoAtomicSwap32 : PseudoAMO; +def PseudoAtomicLoadAdd32 : PseudoAMO; +def PseudoAtomicLoadSub32 : PseudoAMO; +def PseudoAtomicLoadAnd32 : PseudoAMO; +def PseudoAtomicLoadOr32 : PseudoAMO; +def PseudoAtomicLoadXor32 : PseudoAMO; +} // Size = 16 +let Size = 24 in { +def PseudoAtomicLoadMax32 : PseudoAMO; +def PseudoAtomicLoadMin32 : PseudoAMO; +def PseudoAtomicLoadUMax32 : PseudoAMO; +def PseudoAtomicLoadUMin32 : PseudoAMO; +} // Size = 24 + +defm : PseudoAMOPat<"atomic_swap_i32", PseudoAtomicSwap32>; +defm : PseudoAMOPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>; +defm : PseudoAMOPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>; +defm : PseudoAMOPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>; +defm : PseudoAMOPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>; +defm : PseudoAMOPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>; +defm : PseudoAMOPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>; +defm : PseudoAMOPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>; +defm : PseudoAMOPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>; +defm : PseudoAMOPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>; +} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo] + +let Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] in { + +let Size = 16 in { +def PseudoAtomicSwap64 : PseudoAMO; +def PseudoAtomicLoadAdd64 : PseudoAMO; +def PseudoAtomicLoadSub64 : PseudoAMO; +def PseudoAtomicLoadAnd64 : PseudoAMO; +def PseudoAtomicLoadOr64 : PseudoAMO; +def PseudoAtomicLoadXor64 : PseudoAMO; +} // Size = 16 +let Size = 24 in { +def PseudoAtomicLoadMax64 : PseudoAMO; +def PseudoAtomicLoadMin64 : PseudoAMO; +def PseudoAtomicLoadUMax64 : PseudoAMO; +def PseudoAtomicLoadUMin64 : PseudoAMO; +} // Size = 24 + +defm : PseudoAMOPat<"atomic_swap_i64", PseudoAtomicSwap64, i64>; +defm : PseudoAMOPat<"atomic_load_add_i64", PseudoAtomicLoadAdd64, i64>; +defm : PseudoAMOPat<"atomic_load_sub_i64", PseudoAtomicLoadSub64, i64>; +defm : PseudoAMOPat<"atomic_load_and_i64", PseudoAtomicLoadAnd64, i64>; +defm : PseudoAMOPat<"atomic_load_or_i64", PseudoAtomicLoadOr64, i64>; +defm : PseudoAMOPat<"atomic_load_xor_i64", PseudoAtomicLoadXor64, i64>; +defm : PseudoAMOPat<"atomic_load_max_i64", PseudoAtomicLoadMax64, i64>; +defm : PseudoAMOPat<"atomic_load_min_i64", PseudoAtomicLoadMin64, i64>; +defm : PseudoAMOPat<"atomic_load_umax_i64", PseudoAtomicLoadUMax64, i64>; +defm : PseudoAMOPat<"atomic_load_umin_i64", PseudoAtomicLoadUMin64, i64>; +} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] + +let Predicates = [HasStdExtZalrsc] in { let Size = 20 in def PseudoAtomicLoadNand32 : PseudoAMO; @@ -347,14 +405,14 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax, PseudoMaskedAtomicLoadUMax32>; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin, PseudoMaskedAtomicLoadUMin32>; -} // Predicates = [HasStdExtA] +} // Predicates = [HasStdExtZalrsc] -let Predicates = [HasStdExtA, IsRV64] in { +let Predicates = [HasStdExtZalrsc, IsRV64] in { let Size = 20 in def PseudoAtomicLoadNand64 : PseudoAMO; defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>; -} // Predicates = [HasStdExtA, IsRV64] +} // Predicates = [HasStdExtZalrsc, IsRV64] /// Compare and exchange @@ -385,17 +443,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } -let Predicates = [HasStdExtA, NoStdExtZacas] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZacas] in { def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>; } -let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZacas, IsRV64] in { def PseudoCmpXchg64 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>; } -let Predicates = [HasStdExtA] in { +let Predicates = [HasStdExtZalrsc] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, @@ -412,4 +470,4 @@ def : Pat<(XLenVT (int_riscv_masked_cmpxchg (XLenVT GPR:$mask), (XLenVT timm:$ordering))), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -} // Predicates = [HasStdExtA] +} // Predicates = [HasStdExtZalrsc] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 6a4119a..4104abd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -217,6 +217,14 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>; } +let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in { + def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">; +} + +let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in { + def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">; +} + let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector", DestEEW = EEWSEWx4, RVVConstraint=VS2Constraint in { def SF_VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 5591d9f..021353a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -355,9 +355,9 @@ private: SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const; bool extractSubvector(Register &ResVReg, const SPIRVType *ResType, Register &ReadReg, MachineInstr &InsertionPoint) const; - bool generateImageRead(Register &ResVReg, const SPIRVType *ResType, - Register ImageReg, Register IdxReg, DebugLoc Loc, - MachineInstr &Pos) const; + bool generateImageReadOrFetch(Register &ResVReg, const SPIRVType *ResType, + Register ImageReg, Register IdxReg, + DebugLoc Loc, MachineInstr &Pos) const; bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const; bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue, Register ResVReg, const SPIRVType *ResType, @@ -1321,8 +1321,8 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg, } Register IdxReg = IntPtrDef->getOperand(3).getReg(); - return generateImageRead(ResVReg, ResType, NewHandleReg, IdxReg, - I.getDebugLoc(), I); + return generateImageReadOrFetch(ResVReg, ResType, NewHandleReg, IdxReg, + I.getDebugLoc(), I); } } @@ -3639,27 +3639,33 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic( DebugLoc Loc = I.getDebugLoc(); MachineInstr &Pos = I; - return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos); + return generateImageReadOrFetch(ResVReg, ResType, NewImageReg, IdxReg, Loc, + Pos); } -bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg, - const SPIRVType *ResType, - Register ImageReg, - Register IdxReg, DebugLoc Loc, - MachineInstr &Pos) const { +bool SPIRVInstructionSelector::generateImageReadOrFetch( + Register &ResVReg, const SPIRVType *ResType, Register ImageReg, + Register IdxReg, DebugLoc Loc, MachineInstr &Pos) const { SPIRVType *ImageType = GR.getSPIRVTypeForVReg(ImageReg); assert(ImageType && ImageType->getOpcode() == SPIRV::OpTypeImage && "ImageReg is not an image type."); + bool IsSignedInteger = sampledTypeIsSignedInteger(GR.getTypeForSPIRVType(ImageType)); + // Check if the "sampled" operand of the image type is 1. + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpImageFetch + auto SampledOp = ImageType->getOperand(6); + bool IsFetch = (SampledOp.getImm() == 1); uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType); if (ResultSize == 4) { - auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(ImageReg) - .addUse(IdxReg); + auto BMI = + BuildMI(*Pos.getParent(), Pos, Loc, + TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(ImageReg) + .addUse(IdxReg); if (IsSignedInteger) BMI.addImm(0x1000); // SignExtend @@ -3668,11 +3674,13 @@ bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg, SPIRVType *ReadType = widenTypeToVec4(ResType, Pos); Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType)); - auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) - .addDef(ReadReg) - .addUse(GR.getSPIRVTypeID(ReadType)) - .addUse(ImageReg) - .addUse(IdxReg); + auto BMI = + BuildMI(*Pos.getParent(), Pos, Loc, + TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead)) + .addDef(ReadReg) + .addUse(GR.getSPIRVTypeID(ReadType)) + .addUse(ImageReg) + .addUse(IdxReg); if (IsSignedInteger) BMI.addImm(0x1000); // SignExtend bool Succeed = BMI.constrainAllUses(TII, TRI, RBI); diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index cf85691..9bda8a4 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -158,7 +158,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const { Options.X = F.getFnAttribute(Y).getValueAsBool(); \ } while (0) - RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math"); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b5f8ee5..b54a1e7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29516,11 +29516,8 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, if (IgnoreNaN || DAG.isKnownNeverNaN(IsNum ? NewY : NewX)) return MinMax; - if (DAG.isKnownNeverNaN(NewX)) - NewX = NewY; - - SDValue IsNaN = - DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO); + SDValue NaNSrc = IsNum ? MinMax : NewX; + SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NaNSrc, NaNSrc, ISD::SETUO); return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax); } diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 7882045..0fce5b9 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -567,8 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT) { default: if (TT.isOSNetBSD()) return "apcs-gnu"; - if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() || - TT.isOHOSFamily()) + if (TT.isOSFreeBSD() || TT.isOSFuchsia() || TT.isOSOpenBSD() || + TT.isOSHaiku() || TT.isOHOSFamily()) return "aapcs-linux"; return "aapcs"; } @@ -648,6 +648,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) { } case llvm::Triple::OpenBSD: return "cortex-a8"; + case llvm::Triple::Fuchsia: + return "cortex-a53"; default: switch (Triple.getEnvironment()) { case llvm::Triple::EABIHF: diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 31126cc..f08a0c0 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -765,6 +765,12 @@ Error RISCVISAInfo::checkDependency() { if (HasZvl && !HasVector) return getExtensionRequiresError("zvl*b", "v' or 'zve*"); + if (Exts.count("xsfvfbfexp16e") && + !(Exts.count("zvfbfmin") || Exts.count("zvfbfa"))) + return createStringError(errc::invalid_argument, + "'xsfvfbfexp16e' requires 'zvfbfmin' or " + "'zvfbfa' extension to also be specified"); + if (HasD && (HasC || Exts.count("zcd"))) for (auto Ext : ZcdOverlaps) if (Exts.count(Ext.str())) diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 46fb567..aa1346d 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() { // the module flags. if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("branch-target-enforcement"))) - HasBranchTargetEnforcement = (BTE->getZExtValue() != 0); + HasBranchTargetEnforcement = !BTE->isZero(); else HasBranchTargetEnforcement = 0; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 975498f..5aa8de3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // select a, false, b -> select !a, b, false if (match(TrueVal, m_Specific(Zero))) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, FalseVal, Zero); + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom); + NewSI->swapProfMetadata(); + return NewSI; } // select a, b, true -> select !a, true, b if (match(FalseVal, m_Specific(One))) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, One, TrueVal); + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom); + NewSI->swapProfMetadata(); + return NewSI; } // DeMorgan in select form: !a && !b --> !(a || b) // select !a, !b, false --> not (select a, true, b) if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) && (CondVal->hasOneUse() || TrueVal->hasOneUse()) && - !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) - return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B)); + !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) { + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom)); + NewSI->swapProfMetadata(); + return BinaryOperator::CreateNot(NewSI); + } // DeMorgan in select form: !a || !b --> !(a && b) // select !a, true, !b --> not (select a, b, false) if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) && (CondVal->hasOneUse() || FalseVal->hasOneUse()) && - !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) - return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero)); + !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) { + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom)); + NewSI->swapProfMetadata(); + return BinaryOperator::CreateNot(NewSI); + } // select (select a, true, b), true, b -> select a, true, b if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) && diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 40720ae..0873845 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -31,10 +31,12 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" +#include "llvm/Support/AllocToken.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -53,29 +55,12 @@ #include <variant> using namespace llvm; +using TokenMode = AllocTokenMode; #define DEBUG_TYPE "alloc-token" namespace { -//===--- Constants --------------------------------------------------------===// - -enum class TokenMode : unsigned { - /// Incrementally increasing token ID. - Increment = 0, - - /// Simple mode that returns a statically-assigned random token ID. - Random = 1, - - /// Token ID based on allocated type hash. - TypeHash = 2, - - /// Token ID based on allocated type hash, where the top half ID-space is - /// reserved for types that contain pointers and the bottom half for types - /// that do not contain pointers. - TypeHashPointerSplit = 3, -}; - //===--- Command-line options ---------------------------------------------===// cl::opt<TokenMode> ClMode( @@ -131,7 +116,7 @@ cl::opt<uint64_t> ClFallbackToken( //===--- Statistics -------------------------------------------------------===// -STATISTIC(NumFunctionsInstrumented, "Functions instrumented"); +STATISTIC(NumFunctionsModified, "Functions modified"); STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); //===----------------------------------------------------------------------===// @@ -140,9 +125,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); /// /// Expected format is: !{<type-name>, <contains-pointer>} MDNode *getAllocTokenMetadata(const CallBase &CB) { - MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token); - if (!Ret) - return nullptr; + MDNode *Ret = nullptr; + if (auto *II = dyn_cast<IntrinsicInst>(&CB); + II && II->getIntrinsicID() == Intrinsic::alloc_token_id) { + auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0)); + Ret = cast<MDNode>(MDV->getMetadata()); + // If the intrinsic has an empty MDNode, type inference failed. + if (Ret->getNumOperands() == 0) + return nullptr; + } else { + Ret = CB.getMetadata(LLVMContext::MD_alloc_token); + if (!Ret) + return nullptr; + } assert(Ret->getNumOperands() == 2 && "bad !alloc_token"); assert(isa<MDString>(Ret->getOperand(0))); assert(isa<ConstantAsMetadata>(Ret->getOperand(1))); @@ -206,22 +201,19 @@ public: using ModeBase::ModeBase; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { - const auto [N, H] = getHash(CB, ORE); - return N ? boundedToken(H) : H; - } -protected: - std::pair<MDNode *, uint64_t> getHash(const CallBase &CB, - OptimizationRemarkEmitter &ORE) { if (MDNode *N = getAllocTokenMetadata(CB)) { MDString *S = cast<MDString>(N->getOperand(0)); - return {N, getStableSipHash(S->getString())}; + AllocTokenMetadata Metadata{S->getString(), containsPointer(N)}; + if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens)) + return *Token; } // Fallback. remarkNoMetadata(CB, ORE); - return {nullptr, ClFallbackToken}; + return ClFallbackToken; } +protected: /// Remark that there was no precise type information. static void remarkNoMetadata(const CallBase &CB, OptimizationRemarkEmitter &ORE) { @@ -242,20 +234,18 @@ public: using TypeHashMode::TypeHashMode; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { - if (MaxTokens == 1) - return 0; - const uint64_t HalfTokens = MaxTokens / 2; - const auto [N, H] = getHash(CB, ORE); - if (!N) { - // Pick the fallback token (ClFallbackToken), which by default is 0, - // meaning it'll fall into the pointer-less bucket. Override by setting - // -alloc-token-fallback if that is the wrong choice. - return H; + if (MDNode *N = getAllocTokenMetadata(CB)) { + MDString *S = cast<MDString>(N->getOperand(0)); + AllocTokenMetadata Metadata{S->getString(), containsPointer(N)}; + if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata, + MaxTokens)) + return *Token; } - uint64_t Hash = H % HalfTokens; // base hash - if (containsPointer(N)) - Hash += HalfTokens; - return Hash; + // Pick the fallback token (ClFallbackToken), which by default is 0, meaning + // it'll fall into the pointer-less bucket. Override by setting + // -alloc-token-fallback if that is the wrong choice. + remarkNoMetadata(CB, ORE); + return ClFallbackToken; } }; @@ -315,6 +305,9 @@ private: FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID, LibFunc OriginalFunc); + /// Lower alloc_token_* intrinsics. + void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE); + /// Return the token ID from metadata in the call. uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) { return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode); @@ -336,21 +329,32 @@ bool AllocToken::instrumentFunction(Function &F) { // Do not apply any instrumentation for naked functions. if (F.hasFnAttribute(Attribute::Naked)) return false; - if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) - return false; // Don't touch available_externally functions, their actual body is elsewhere. if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; - // Only instrument functions that have the sanitize_alloc_token attribute. - if (!F.hasFnAttribute(Attribute::SanitizeAllocToken)) - return false; auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls; + SmallVector<IntrinsicInst *, 4> IntrinsicInsts; + + // Only instrument functions that have the sanitize_alloc_token attribute. + const bool InstrumentFunction = + F.hasFnAttribute(Attribute::SanitizeAllocToken) && + !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); // Collect all allocation calls to avoid iterator invalidation. for (Instruction &I : instructions(F)) { + // Collect all alloc_token_* intrinsics. + if (auto *II = dyn_cast<IntrinsicInst>(&I); + II && II->getIntrinsicID() == Intrinsic::alloc_token_id) { + IntrinsicInsts.emplace_back(II); + continue; + } + + if (!InstrumentFunction) + continue; + auto *CB = dyn_cast<CallBase>(&I); if (!CB) continue; @@ -359,11 +363,21 @@ bool AllocToken::instrumentFunction(Function &F) { } bool Modified = false; - for (auto &[CB, Func] : AllocCalls) - Modified |= replaceAllocationCall(CB, Func, ORE, TLI); - if (Modified) - NumFunctionsInstrumented++; + if (!AllocCalls.empty()) { + for (auto &[CB, Func] : AllocCalls) + Modified |= replaceAllocationCall(CB, Func, ORE, TLI); + if (Modified) + NumFunctionsModified++; + } + + if (!IntrinsicInsts.empty()) { + for (auto *II : IntrinsicInsts) + replaceIntrinsicInst(II, ORE); + Modified = true; + NumFunctionsModified++; + } + return Modified; } @@ -381,7 +395,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB, if (TLI.getLibFunc(*Callee, Func)) { if (isInstrumentableLibFunc(Func, CB, TLI)) return Func; - } else if (Options.Extended && getAllocTokenMetadata(CB)) { + } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) { return NotLibFunc; } @@ -528,6 +542,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB, return TokenAlloc; } +void AllocToken::replaceIntrinsicInst(IntrinsicInst *II, + OptimizationRemarkEmitter &ORE) { + assert(II->getIntrinsicID() == Intrinsic::alloc_token_id); + + uint64_t TokenID = getToken(*II, ORE); + Value *V = ConstantInt::get(IntPtrTy, TokenID); + II->replaceAllUsesWith(V); + II->eraseFromParent(); +} + } // namespace AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts) diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index d18c0d0..80e77e09 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F, F.removeFnAttr(attr); \ FMF.set##setter(); \ } - MOVE_FLAG("unsafe-fp-math", Fast) MOVE_FLAG("no-infs-fp-math", NoInfs) MOVE_FLAG("no-nans-fp-math", NoNaNs) MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index b4e4dc2..c95c887 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { return Parent->getEnclosingBlockWithPredecessors(); } -bool VPBlockUtils::isHeader(const VPBlockBase *VPB, - const VPDominatorTree &VPDT) { - auto *VPBB = dyn_cast<VPBasicBlock>(VPB); - if (!VPBB) - return false; - - // If VPBB is in a region R, VPBB is a loop header if R is a loop region with - // VPBB as its entry, i.e., free of predecessors. - if (auto *R = VPBB->getParent()) - return !R->isReplicator() && !VPBB->hasPredecessors(); - - // A header dominates its second predecessor (the latch), with the other - // predecessor being the preheader - return VPB->getPredecessors().size() == 2 && - VPDT.dominates(VPB, VPB->getPredecessors()[1]); -} - -bool VPBlockUtils::isLatch(const VPBlockBase *VPB, - const VPDominatorTree &VPDT) { - // A latch has a header as its second successor, with its other successor - // leaving the loop. A preheader OTOH has a header as its first (and only) - // successor. - return VPB->getNumSuccessors() == 2 && - VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); -} - VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { iterator It = begin(); while (It != end() && It->isPhi()) @@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { VPRegionBlock *VPRegionBlock::clone() { const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); - auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, - getName(), isReplicator()); + VPlan &Plan = *getPlan(); + VPRegionBlock *NewRegion = + isReplicator() + ? Plan.createReplicateRegion(NewEntry, NewExiting, getName()) + : Plan.createLoopRegion(getName(), NewEntry, NewExiting); + for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) Block->setParent(NewRegion); return NewRegion; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8274431..167ba55 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4450,22 +4450,24 @@ public: return VPB; } - /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p - /// IsReplicator is true, the region is a replicate region. The returned block - /// is owned by the VPlan and deleted once the VPlan is destroyed. - VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, - const std::string &Name = "", - bool IsReplicator = false) { - auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator); + /// Create a new loop region with \p Name and entry and exiting blocks set + /// to \p Entry and \p Exiting respectively, if set. The returned block is + /// owned by the VPlan and deleted once the VPlan is destroyed. + VPRegionBlock *createLoopRegion(const std::string &Name = "", + VPBlockBase *Entry = nullptr, + VPBlockBase *Exiting = nullptr) { + auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name) + : new VPRegionBlock(Name); CreatedBlocks.push_back(VPB); return VPB; } - /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set - /// to nullptr. The returned block is owned by the VPlan and deleted once the - /// VPlan is destroyed. - VPRegionBlock *createVPRegionBlock(const std::string &Name = "") { - auto *VPB = new VPRegionBlock(Name); + /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The + /// returned block is owned by the VPlan and deleted once the VPlan is + /// destroyed. + VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, + const std::string &Name = "") { + auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true); CreatedBlocks.push_back(VPB); return VPB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 332791a..65688a3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { // LatchExitVPB, taking care to preserve the original predecessor & successor // order of blocks. Set region entry and exiting after both HeaderVPB and // LatchVPBB have been disconnected from their predecessors/successors. - auto *R = Plan.createVPRegionBlock(); + auto *R = Plan.createLoopRegion(); VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R); VPBlockUtils::disconnectBlocks(LatchVPBB, R); VPBlockUtils::connectBlocks(PreheaderVPBB, R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7bf8d83..ff25ef5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, auto *Exiting = Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); VPRegionBlock *Region = - Plan.createVPRegionBlock(Entry, Exiting, RegionName, true); + Plan.createReplicateRegion(Entry, Exiting, RegionName); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 10801c0..32e4b88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -8,6 +8,7 @@ #include "VPlanUtils.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "VPlanPatternMatch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan, return UncountableCondition; } + +bool VPBlockUtils::isHeader(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + auto *VPBB = dyn_cast<VPBasicBlock>(VPB); + if (!VPBB) + return false; + + // If VPBB is in a region R, VPBB is a loop header if R is a loop region with + // VPBB as its entry, i.e., free of predecessors. + if (auto *R = VPBB->getParent()) + return !R->isReplicator() && !VPBB->hasPredecessors(); + + // A header dominates its second predecessor (the latch), with the other + // predecessor being the preheader + return VPB->getPredecessors().size() == 2 && + VPDT.dominates(VPB, VPB->getPredecessors()[1]); +} + +bool VPBlockUtils::isLatch(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + // A latch has a header as its second successor, with its other successor + // leaving the loop. A preheader OTOH has a header as its first (and only) + // successor. + return VPB->getNumSuccessors() == 2 && + VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); +} diff --git a/llvm/test/Bitcode/upgrade-branch-protection.ll b/llvm/test/Bitcode/upgrade-branch-protection.ll index 1b33e39..6f60ba5 100644 --- a/llvm/test/Bitcode/upgrade-branch-protection.ll +++ b/llvm/test/Bitcode/upgrade-branch-protection.ll @@ -1,8 +1,11 @@ -;; Test that module flags "branch-target-enforcement" and "sign-return-address" can be upgraded to -;; are upgraded from Error to Min. +;; Test that module flags "branch-target-enforcement" and "sign-return-address" +;; can be upgraded to are upgraded from Error to Min and the value is changed 2 +;; as the module is converted to the semantic. ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + !llvm.module.flags = !{!0, !1, !2, !3} !0 = !{i32 1, !"branch-target-enforcement", i32 1} @@ -10,7 +13,7 @@ !2 = !{i32 1, !"sign-return-address-all", i32 1} !3 = !{i32 1, !"sign-return-address-with-bkey", i32 1} -;CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 1} -;CHECK: !1 = !{i32 8, !"sign-return-address", i32 1} -;CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 1} -;CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 1}
\ No newline at end of file +;CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 2} +;CHECK: !1 = !{i32 8, !"sign-return-address", i32 2} +;CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 2} +;CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 2} diff --git a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll index e17a0a9..54f752e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -enable-unsafe-fp-math -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -verify-machineinstrs | FileCheck %s define void @foo_2d(ptr %src) { ; CHECK-LABEL: %entry diff --git a/llvm/test/CodeGen/AArch64/csel-zero-float.ll b/llvm/test/CodeGen/AArch64/csel-zero-float.ll index 6edde13..56a33cc 100644 --- a/llvm/test/CodeGen/AArch64/csel-zero-float.ll +++ b/llvm/test/CodeGen/AArch64/csel-zero-float.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-unsafe-fp-math < %s +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s ; There is no invocation to FileCheck as this ; caused a crash in "Post-RA pseudo instruction expansion" diff --git a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll index f78fcea..b8dcd6f 100644 --- a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple aarch64-none-linux-gnu -enable-unsafe-fp-math -mattr=+fullfp16 < %s | FileCheck %s +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+fullfp16 < %s | FileCheck %s define half @scvtf_f16_2(i32 %state) { ; CHECK-LABEL: scvtf_f16_2: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir index d9ac9a7..de1bb47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s # Test that we fold correct element from G_UNMERGE_VALUES into fma diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index 52b1beb..91f2f6f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX10 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-unsafe-fp-math -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11 --- name: fract_f64_neg diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll index 00c6656..b3a7057 100644 --- a/llvm/test/CodeGen/AMDGPU/add-max.ll +++ b/llvm/test/CodeGen/AMDGPU/add-max.ll @@ -5,7 +5,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_u32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) @@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_u32_svv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1 +; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) @@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) { define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { ; SDAG-LABEL: add_max_u32_ssv: ; SDAG: ; %bb.0: -; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0 +; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0 ; SDAG-NEXT: ; return to shader part epilog ; ; GISEL-LABEL: add_max_u32_ssv: @@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) { ; GCN-LABEL: add_max_u32_vsi: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4 +; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 4) @@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) { define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) { ; GCN-LABEL: add_max_u32_svl: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64 +; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 100) @@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) { define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) { ; SDAG-LABEL: add_max_u32_slv: ; SDAG: ; %bb.0: -; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0 +; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0 ; SDAG-NEXT: ; return to shader part epilog ; ; GISEL-LABEL: add_max_u32_slv: @@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) { define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_i32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.smax.i32(i32 %add, i32 %c) @@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_min_u32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umin.i32(i32 %add, i32 %c) @@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_min_i32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.smin.i32(i32 %add, i32 %c) diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 7ee0015f..711d57b 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) { ; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2 +; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2 ; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0 @@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) { ; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4 ; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5 -; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4 +; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5 +; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1] ; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3] @@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8 -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7 +; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6 +; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6 ; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3] ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5] @@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4 ; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8 +; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8 ; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2 @@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7 ; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6 -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7 +; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6 +; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3] ; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1] @@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8 +; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54 ; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2 @@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) { ; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14 -; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9 +; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8 +; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8 ; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5] ; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11 -; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10 +; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11 +; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11 ; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3] ; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6 ; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4 diff --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll index 13206ad..f45070c 100644 --- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s -; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s +; RUN: llc -mtriple=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s +; RUN: llc -mtriple=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s ; FIXME: This should also fold when fma is actually fast if an FMA ; exists in the original program. diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll index d41e2c6..8df7564 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX89,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=GFX11-FAKE16 %s define amdgpu_kernel void @fpext_f16_to_f32( ; SI-LABEL: fpext_f16_to_f32: diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll index a43292d..a043d53 100644 --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s define amdgpu_kernel void @fptosi_f16_to_i16( diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll index 96cb621..af1ab37 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s define amdgpu_kernel void @fptoui_f16_to_i16( diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index 5d31177..f1165491 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -2,14 +2,14 @@ ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll index f09c1c6..cc2e78d 100644 --- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll @@ -2,8 +2,8 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s declare double @llvm.fabs.f64(double) #0 declare double @llvm.floor.f64(double) #0 diff --git a/llvm/test/CodeGen/AMDGPU/fract.ll b/llvm/test/CodeGen/AMDGPU/fract.ll index 8ef0fcf..723fd93 100644 --- a/llvm/test/CodeGen/AMDGPU/fract.ll +++ b/llvm/test/CodeGen/AMDGPU/fract.ll @@ -1,8 +1,8 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefix=GCN %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare float @llvm.floor.f32(float) #0 diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index 4ae0ba0..4e93eca 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=GCN,UNSAFE %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 %s | FileCheck --check-prefixes=GCN,UNSAFE %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck --check-prefixes=GCN,NONANS %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck --check-prefixes=GCN,NOINFS %s diff --git a/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll b/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll index ef3e04c..6ce614b 100644 --- a/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/prevent-fmul-hoist-ir.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=fast -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=FP-CONTRACT-FAST %s -; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off --enable-unsafe-fp-math -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=UNSAFE-FP-MATH %s +; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=UNSAFE-FP-MATH %s ; RUN: opt -S -passes='simplifycfg<hoist-common-insts>' -mtriple=amdgcn-- --fp-contract=off -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX -check-prefix=NO-UNSAFE-FP-MATH %s define double @is_profitable_f64_contract(ptr dereferenceable(8) %ptr_x, ptr dereferenceable(8) %ptr_y, ptr dereferenceable(8) %ptr_a) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll index 09596e9..7ddd90e 100644 --- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s define amdgpu_kernel void @sitofp_i16_to_f16( ; SI-LABEL: sitofp_i16_to_f16: diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll index 9bcba6c..2d7ce10 100644 --- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s define amdgpu_kernel void @uitofp_i16_to_f16( ; SI-LABEL: uitofp_i16_to_f16: diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll index f734db8..20fe272 100644 --- a/llvm/test/CodeGen/ARM/llround-conv.ll +++ b/llvm/test/CodeGen/ARM/llround-conv.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 define i64 @testmsxh_builtin(half %x) { @@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) { ; CHECK-NOFP16-NEXT: bl llroundf ; CHECK-NOFP16-NEXT: pop {r11, pc} ; +; CHECK-FPv8-LABEL: testmsxh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: .save {r11, lr} +; CHECK-FPv8-NEXT: push {r11, lr} +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: bl llroundf +; CHECK-FPv8-NEXT: pop {r11, pc} +; ; CHECK-FP16-LABEL: testmsxh_builtin: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r11, lr} diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll index 03f7a0d..7466bcb 100644 --- a/llvm/test/CodeGen/ARM/lround-conv.ll +++ b/llvm/test/CodeGen/ARM/lround-conv.ll @@ -4,11 +4,39 @@ ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -;define i32 @testmswh_builtin(half %x) { -;entry: -; %0 = tail call i32 @llvm.lround.i32.f16(half %x) -; ret i32 %0 -;} +define i32 @testmswh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmswh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: pop {r11, lr} +; CHECK-SOFT-NEXT: b lroundf +; +; CHECK-NOFP16-LABEL: testmswh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: pop {r11, lr} +; CHECK-NOFP16-NEXT: b lroundf +; +; CHECK-FPv8-LABEL: testmswh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: b lroundf +; +; CHECK-FP16-LABEL: testmswh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vcvta.s32.f16 s0, s0 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.lround.i32.f16(half %x) + ret i32 %0 +} define i32 @testmsws_builtin(float %x) { ; CHECK-LABEL: testmsws_builtin: @@ -40,8 +68,3 @@ entry: ret i32 %0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-FP16: {{.*}} -; CHECK-FPv8: {{.*}} -; CHECK-NOFP16: {{.*}} -; CHECK-SOFT: {{.*}} diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll new file mode 100644 index 0000000..9190d03 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -dxil-prepare < %s | FileCheck %s + +; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata + +target triple = "dxil-unknown-shadermodel6.0-compute" + +define void @main() { +entry: + ret void +} + +; CHECK-NOT: !llvm.errno.tbaa +; CHECK-NOT: {{^!}} + +!llvm.errno.tbaa = !{!0} + +!0 = !{!1, !1, i64 0} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll new file mode 100644 index 0000000..f7e5cdb --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll @@ -0,0 +1,33 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s +; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1) +; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0) +; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4) +; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]]) +; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0) +; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]]) +; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]]) +; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]]) +; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}}) +; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}}) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}}) +; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}}) +; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}}) +define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) { +entry: + %load = load i64, ptr %in, align 4 + %bitcast = bitcast i64 %load to <64 x i1> + %e0 = extractelement <64 x i1> %bitcast, i32 0 + %e1 = extractelement <64 x i1> %bitcast, i32 1 + %z0 = zext i1 %e0 to i8 + %z1 = zext i1 %e1 to i8 + %ptr0 = getelementptr i8, ptr %out, i32 0 + %ptr1 = getelementptr i8, ptr %out, i32 1 + store i8 %z0, ptr %ptr0, align 1 + store i8 %z1, ptr %ptr1, align 1 + ret void +} + diff --git a/llvm/test/CodeGen/Hexagon/swp-phi.ll b/llvm/test/CodeGen/Hexagon/swp-phi.ll index 9b0e126..6ce2481 100644 --- a/llvm/test/CodeGen/Hexagon/swp-phi.ll +++ b/llvm/test/CodeGen/Hexagon/swp-phi.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=hexagon -enable-unsafe-fp-math -enable-pipeliner \ +; RUN: llc -mtriple=hexagon -enable-pipeliner \ ; RUN: -pipeliner-prune-deps=false -stats -o /dev/null < %s ; REQUIRES: asserts diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json new file mode 100644 index 0000000..5de715b --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json @@ -0,0 +1,22 @@ +{ + "entities": { + "KILL": [0.1, 0.2, 0.3], + "MOV": [0.4, 0.5, 0.6], + "LEA": [0.7, 0.8, 0.9], + "RET": [1.0, 1.1, 1.2], + "ADD": [1.3, 1.4, 1.5], + "SUB": [1.6, 1.7, 1.8], + "IMUL": [1.9, 2.0, 2.1], + "AND": [2.2, 2.3, 2.4], + "OR": [2.5, 2.6, 2.7], + "XOR": [2.8, 2.9, 3.0], + "CMP": [3.1, 3.2, 3.3], + "TEST": [3.4, 3.5, 3.6], + "JMP": [3.7, 3.8, 3.9], + "CALL": [4.0, 4.1, 4.2], + "PUSH": [4.3, 4.4, 4.5], + "POP": [4.6, 4.7, 4.8], + "NOP": [4.9, 5.0, 5.1], + "COPY": [5.2, 5.3, 5.4] + } +}
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/if-else.mir b/llvm/test/CodeGen/MIR2Vec/if-else.mir new file mode 100644 index 0000000..5734a23 --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/if-else.mir @@ -0,0 +1,144 @@ +# REQUIRES: x86-registered-target +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + define dso_local i32 @abc(i32 noundef %a, i32 noundef %b) { + entry: + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %b.addr, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + + if.then: ; preds = %entry + %2 = load i32, ptr %b.addr, align 4 + store i32 %2, ptr %retval, align 4 + br label %return + + if.else: ; preds = %entry + %3 = load i32, ptr %a.addr, align 4 + store i32 %3, ptr %retval, align 4 + br label %return + + return: ; preds = %if.else, %if.then + %4 = load i32, ptr %retval, align 4 + ret i32 %4 + } +... +--- +name: abc +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr32, preferred-register: '', flags: [ ] } + - { id: 1, class: gr32, preferred-register: '', flags: [ ] } + - { id: 2, class: gr32, preferred-register: '', flags: [ ] } + - { id: 3, class: gr32, preferred-register: '', flags: [ ] } + - { id: 4, class: gr32, preferred-register: '', flags: [ ] } + - { id: 5, class: gr32, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 +fixedStack: [] +stack: + - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: a.addr, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: b.addr, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + amxProgModel: None +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $esi + + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + MOV32mr %stack.1.a.addr, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.a.addr) + MOV32mr %stack.2.b.addr, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %ir.b.addr) + %2:gr32 = SUB32rr %0, %1, implicit-def $eflags + JCC_1 %bb.2, 14, implicit $eflags + JMP_1 %bb.1 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %4:gr32 = MOV32rm %stack.2.b.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.b.addr) + MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into %ir.retval) + JMP_1 %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %3:gr32 = MOV32rm %stack.1.a.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.a.addr) + MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %3 :: (store (s32) into %ir.retval) + + bb.3.return: + %5:gr32 = MOV32rm %stack.0.retval, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.retval) + $eax = COPY %5 + RET 0, $eax +... + +# CHECK: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: abc:entry: +# CHECK-NEXT: [ 16.50 17.10 17.70 ] +# CHECK-NEXT: Machine basic block: abc:if.then: +# CHECK-NEXT: [ 4.50 4.80 5.10 ] +# CHECK-NEXT: Machine basic block: abc:if.else: +# CHECK-NEXT: [ 0.80 1.00 1.20 ] +# CHECK-NEXT: Machine basic block: abc:return: +# CHECK-NEXT: [ 6.60 6.90 7.20 ]
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir new file mode 100644 index 0000000..338cb63 --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir @@ -0,0 +1,76 @@ +# REQUIRES: x86-registered-target +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + define dso_local noundef i32 @add_function(i32 noundef %a, i32 noundef %b) { + entry: + %sum = add nsw i32 %a, %b + %result = mul nsw i32 %sum, 2 + ret i32 %result + } + + define dso_local void @simple_function() { + entry: + ret void + } +... +--- +name: add_function +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $edi, $esi + + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + %2:gr32 = nsw ADD32rr %0, %1, implicit-def dead $eflags + %3:gr32 = ADD32rr %2, %2, implicit-def dead $eflags + $eax = COPY %3 + RET 0, $eax + +--- +name: simple_function +alignment: 16 +tracksRegLiveness: true +body: | + bb.0.entry: + RET 0 + +# CHECK: MIR2Vec embeddings for machine function add_function: +# CHECK: Function vector: [ 19.20 19.80 20.40 ] +# CHECK-NEXT: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: add_function:entry: +# CHECK-NEXT: [ 19.20 19.80 20.40 ] +# CHECK-NEXT: Machine instruction vectors: +# CHECK-NEXT: Machine instruction: %1:gr32 = COPY $esi +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: %0:gr32 = COPY $edi +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: %2:gr32 = nsw ADD32rr %0:gr32(tied-def 0), %1:gr32, implicit-def dead $eflags +# CHECK-NEXT: [ 1.30 1.40 1.50 ] +# CHECK-NEXT: Machine instruction: %3:gr32 = ADD32rr %2:gr32(tied-def 0), %2:gr32, implicit-def dead $eflags +# CHECK-NEXT: [ 1.30 1.40 1.50 ] +# CHECK-NEXT: Machine instruction: $eax = COPY %3:gr32 +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: RET 0, $eax +# CHECK-NEXT: [ 1.00 1.10 1.20 ] + +# CHECK: MIR2Vec embeddings for machine function simple_function: +# CHECK-NEXT:Function vector: [ 1.00 1.10 1.20 ] +# CHECK-NEXT: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: simple_function:entry: +# CHECK-NEXT: [ 1.00 1.10 1.20 ] +# CHECK-NEXT: Machine instruction vectors: +# CHECK-NEXT: Machine instruction: RET 0 +# CHECK-NEXT: [ 1.00 1.10 1.20 ]
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll index 80b4048..c6554bc 100644 --- a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll +++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll @@ -1,8 +1,8 @@ -; REQUIRES: x86_64-linux -; RUN: llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS +; REQUIRES: x86-registered-target +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS define dso_local void @test() { entry: diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll index 6693c90..db0eae7 100644 --- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll +++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | %ptxas-verify %} define ptx_device float @t1_f32(float %x, float %y, float %z, ; CHECK-UNSAFE-LABEL: t1_f32( diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index cad684e..baa127e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -2,8 +2,8 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF -; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG -; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL +; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG +; RUN: llc < %s -mtriple=powerpc64le -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL ; Test FP transforms using instruction/node-level fast-math-flags. ; We're also checking debug output to verify that FMF is propagated to the newly created nodes. diff --git a/llvm/test/CodeGen/PowerPC/scalar-equal.ll b/llvm/test/CodeGen/PowerPC/scalar-equal.ll index 1832475..c0b11b4 100644 --- a/llvm/test/CodeGen/PowerPC/scalar-equal.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-equal.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ ; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P8 -; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ ; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll index ca9baceb..5915bd3 100644 --- a/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-min-max-p10.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ ; RUN: --enable-no-nans-fp-math \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll index fd0b494..881d1f4 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ ; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P8 -; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ ; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ diff --git a/llvm/test/CodeGen/RISCV/atomic-fence.ll b/llvm/test/CodeGen/RISCV/atomic-fence.ll index 7103345..77148f6 100644 --- a/llvm/test/CodeGen/RISCV/atomic-fence.ll +++ b/llvm/test/CodeGen/RISCV/atomic-fence.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll index 7e3abc7..c6234de 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ @@ -44,6 +48,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i8_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lb a0, 0(a0) @@ -59,6 +68,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i8_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) @@ -78,6 +92,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lb a0, 0(a0) @@ -93,6 +112,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) @@ -112,6 +136,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i8_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lb a0, 0(a0) @@ -133,6 +163,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i8_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lb a0, 0(a0) @@ -200,6 +236,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -223,6 +266,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -286,6 +336,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i16_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lh a0, 0(a0) @@ -301,6 +356,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i16_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) @@ -320,6 +380,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lh a0, 0(a0) @@ -335,6 +400,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) @@ -354,6 +424,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i16_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lh a0, 0(a0) @@ -375,6 +451,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i16_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lh a0, 0(a0) @@ -442,6 +524,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -465,6 +554,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -528,6 +624,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i32_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lw a0, 0(a0) @@ -543,6 +644,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i32_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) @@ -562,6 +668,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lw a0, 0(a0) @@ -577,6 +688,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) @@ -596,6 +712,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lw a0, 0(a0) @@ -617,6 +739,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lw a0, 0(a0) @@ -684,6 +812,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -707,6 +842,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -770,6 +912,16 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 0 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -790,6 +942,11 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i64_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) @@ -809,6 +966,16 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 0 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -829,6 +996,11 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) @@ -848,6 +1020,16 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 2 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -868,6 +1050,12 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: ld a0, 0(a0) @@ -914,6 +1102,16 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 5 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -934,6 +1132,13 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -979,6 +1184,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i8_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sb a1, 0(a0) @@ -994,6 +1204,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i8_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) @@ -1013,6 +1228,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sb a1, 0(a0) @@ -1028,6 +1248,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) @@ -1047,6 +1272,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i8_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1068,6 +1299,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i8_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1135,6 +1372,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i8_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1157,6 +1401,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i8_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1219,6 +1470,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i16_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sh a1, 0(a0) @@ -1234,6 +1490,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i16_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) @@ -1253,6 +1514,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sh a1, 0(a0) @@ -1268,6 +1534,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) @@ -1287,6 +1558,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i16_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1308,6 +1585,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i16_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1375,6 +1658,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i16_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1397,6 +1687,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i16_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1459,6 +1756,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i32_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sw a1, 0(a0) @@ -1474,6 +1776,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i32_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) @@ -1493,6 +1800,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sw a1, 0(a0) @@ -1508,6 +1820,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) @@ -1527,6 +1844,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1548,6 +1871,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1615,6 +1944,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1637,6 +1973,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1699,6 +2042,16 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1719,6 +2072,11 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i64_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) @@ -1738,6 +2096,16 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1758,6 +2126,11 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) @@ -1777,6 +2150,16 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1797,6 +2180,12 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1843,6 +2232,16 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1863,6 +2262,13 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll index 4dafd6a..d5238ab 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll @@ -3,10 +3,14 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV32I-LABEL: atomicrmw_sub_i32_constant: @@ -26,6 +30,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_constant: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a1, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB0_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_constant: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -42,6 +58,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV64IA-NEXT: li a1, -1 ; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_constant: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB0_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i32 1 seq_cst ret i32 %1 } @@ -71,6 +99,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_constant: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 1 +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: li a2, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_constant: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -87,6 +127,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { ; RV64IA-NEXT: li a1, -1 ; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_constant: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB1_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i64 1 seq_cst ret i64 %1 } @@ -109,6 +161,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { ; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_neg: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sub a2, a1, a2 +; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a1, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB2_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_neg: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -125,6 +189,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { ; RV64IA-NEXT: sub a2, a2, a1 ; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_neg: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: subw a2, a1, a2 +; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB2_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %b = sub i32 %x, %y %1 = atomicrmw sub ptr %a, i32 %b seq_cst ret i32 %1 @@ -159,6 +235,20 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_neg: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sltu a5, a1, a3 +; RV32I-ZALRSC-NEXT: sub a2, a2, a4 +; RV32I-ZALRSC-NEXT: sub a2, a2, a5 +; RV32I-ZALRSC-NEXT: sub a1, a1, a3 +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_neg: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -175,6 +265,18 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV64IA-NEXT: sub a2, a2, a1 ; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_neg: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sub a2, a1, a2 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %b = sub i64 %x, %y %1 = atomicrmw sub ptr %a, i64 %b seq_cst ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 1213256..26feb83 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-WMO,RV32IA-WMO-NOZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-TSO,RV32IA-TSO-NOZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-WMO,RV64IA-WMO-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ @@ -50,6 +54,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB0_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -80,6 +104,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB0_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -174,6 +218,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB1_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -224,6 +288,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB1_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -378,6 +462,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB2_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -428,6 +532,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB2_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -582,6 +706,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -632,6 +776,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -786,6 +950,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -816,6 +1000,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -914,6 +1118,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB5_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -936,6 +1156,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB5_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1004,6 +1240,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1037,6 +1289,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1138,6 +1406,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB7_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1171,6 +1455,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB7_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1272,6 +1572,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1305,6 +1621,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1406,6 +1738,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1439,6 +1787,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1540,6 +1904,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB10_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1561,6 +1940,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB10_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1630,6 +2024,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB11_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1661,6 +2070,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB11_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1760,6 +2184,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB12_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1791,6 +2230,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB12_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1890,6 +2344,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB13_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1921,6 +2390,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB13_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2020,6 +2504,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB14_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2051,6 +2550,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB14_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2149,6 +2663,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2179,6 +2713,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2273,6 +2827,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2323,6 +2897,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2477,6 +3071,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB17_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2527,6 +3141,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB17_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2681,6 +3315,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2731,6 +3385,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2885,6 +3559,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB19_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2915,6 +3609,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB19_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3009,6 +3723,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB20_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3039,6 +3773,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB20_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3137,6 +3891,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB21_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3187,6 +3961,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB21_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3345,6 +4139,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB22_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3395,6 +4209,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB22_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3553,6 +4387,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3603,6 +4457,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3761,6 +4635,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3791,6 +4685,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3889,6 +4803,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB25_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3913,6 +4846,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB25_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3989,6 +4941,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB26_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4027,6 +4998,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB26_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4145,6 +5135,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB27_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4183,6 +5192,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB27_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4301,6 +5329,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB28_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4339,6 +5386,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB28_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4457,6 +5523,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB29_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4495,6 +5580,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB29_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4613,6 +5717,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB30_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -4644,6 +5769,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB30_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -4865,6 +6011,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB31_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4917,6 +6084,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB31_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5201,6 +6389,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB32_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5253,6 +6462,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB32_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5537,6 +6767,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB33_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5589,6 +6840,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB33_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5873,6 +7145,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB34_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -5904,6 +7197,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB34_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6129,6 +7443,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB35_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6149,6 +7478,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB35_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6213,6 +7557,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB36_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6243,6 +7602,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB36_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6337,6 +7711,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB37_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6367,6 +7756,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB37_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6461,6 +7865,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB38_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6491,6 +7910,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB38_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6585,6 +8019,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB39_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6615,6 +8064,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB39_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6709,6 +8173,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB40_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6729,6 +8208,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB40_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6793,6 +8287,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB41_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6823,6 +8332,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB41_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6917,6 +8441,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB42_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6947,6 +8486,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB42_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7041,6 +8595,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB43_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7071,6 +8640,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB43_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7165,6 +8749,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB44_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7195,6 +8794,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB44_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7321,6 +8935,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB45_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB45_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -7392,6 +9035,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB45_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB45_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -7545,6 +9217,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB46_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB46_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7645,6 +9346,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB46_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB46_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7885,6 +9615,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB47_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7985,6 +9744,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB47_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8225,6 +10013,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB48_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB48_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8325,6 +10142,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB48_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB48_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8565,6 +10411,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB49_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8636,6 +10511,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB49_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8789,6 +10693,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB50_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB50_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8860,6 +10793,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB50_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB50_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -9013,6 +10975,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB51_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB51_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9113,6 +11104,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB51_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB51_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9353,6 +11373,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB52_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB52_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9453,6 +11502,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB52_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB52_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9693,6 +11771,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB53_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB53_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9793,6 +11900,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB53_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB53_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10033,6 +12169,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB54_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB54_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10104,6 +12269,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB54_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB54_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10255,6 +12449,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB55_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10319,6 +12537,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB55_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10455,6 +12697,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB56_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10543,6 +12809,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB56_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10751,6 +13041,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB57_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10839,6 +13153,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB57_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11047,6 +13385,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB58_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11135,6 +13497,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB58_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11343,6 +13729,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB59_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11407,6 +13817,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB59_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11543,6 +13977,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB60_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11607,6 +14065,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB60_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11743,6 +14225,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB61_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11831,6 +14337,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB61_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12039,6 +14569,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB62_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12127,6 +14681,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB62_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12335,6 +14913,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB63_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12423,6 +15025,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB63_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12631,6 +15257,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB64_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12695,6 +15345,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB64_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12801,6 +15475,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB65_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12832,6 +15527,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB65_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12929,6 +15645,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB66_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12981,6 +15718,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB66_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13141,6 +15899,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB67_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13193,6 +15972,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB67_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13353,6 +16153,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB68_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13405,6 +16226,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB68_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13565,6 +16407,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB69_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -13596,6 +16459,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB69_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -13697,6 +16581,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB70_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -13720,6 +16621,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB70_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -13791,6 +16709,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB71_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13826,6 +16761,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB71_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13933,6 +16885,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB72_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13968,6 +16937,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB72_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14075,6 +17061,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB73_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14110,6 +17113,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB73_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14217,6 +17237,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB74_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14252,6 +17289,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB74_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14360,6 +17414,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB75_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -14383,6 +17453,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB75_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -14456,6 +17542,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB76_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14490,6 +17592,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB76_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14596,6 +17714,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB77_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14630,6 +17764,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB77_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14736,6 +17886,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB78_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14770,6 +17936,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB78_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14876,6 +18058,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB79_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14910,6 +18108,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB79_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -15014,6 +18228,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB80_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15045,6 +18280,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB80_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15142,6 +18398,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB81_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15194,6 +18471,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB81_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15354,6 +18652,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB82_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15406,6 +18725,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB82_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15566,6 +18906,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB83_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15618,6 +18979,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB83_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15778,6 +19160,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB84_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15809,6 +19212,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB84_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15906,6 +19330,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB85_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15937,6 +19382,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB85_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16038,6 +19504,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB86_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16090,6 +19577,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB86_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16254,6 +19762,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB87_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16306,6 +19835,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB87_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16470,6 +20020,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB88_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16522,6 +20093,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB88_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16686,6 +20278,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB89_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16717,6 +20330,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB89_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16818,6 +20452,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB90_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16843,6 +20497,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB90_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16922,6 +20596,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB91_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16962,6 +20656,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB91_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17086,6 +20800,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB92_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17126,6 +20860,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB92_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17250,6 +21004,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB93_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17290,6 +21064,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB93_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17414,6 +21208,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB94_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17454,6 +21268,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB94_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17578,6 +21412,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB95_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -17610,6 +21466,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB95_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -17838,6 +21716,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB96_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17892,6 +21792,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB96_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18186,6 +22108,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB97_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18240,6 +22184,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB97_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18534,6 +22500,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB98_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18588,6 +22576,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB98_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18882,6 +22892,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB99_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -18914,6 +22946,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB99_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19146,6 +23200,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB100_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19167,6 +23237,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB100_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19234,6 +23320,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB101_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19266,6 +23368,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB101_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19366,6 +23484,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB102_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19398,6 +23532,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB102_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19498,6 +23648,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB103_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19530,6 +23696,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB103_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19630,6 +23812,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB104_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19662,6 +23860,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB104_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19762,6 +23976,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB105_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19783,6 +24013,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB105_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19850,6 +24096,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB106_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19882,6 +24144,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB106_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19982,6 +24260,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB107_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20014,6 +24308,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB107_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20114,6 +24424,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB108_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20146,6 +24472,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB108_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20246,6 +24588,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB109_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20278,6 +24636,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB109_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20410,6 +24784,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB110_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -20483,6 +24888,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB110_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -20642,6 +25078,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB111_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20746,6 +25213,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB111_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20998,6 +25496,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB112_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21102,6 +25631,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB112_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21354,6 +25914,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB113_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21458,6 +26049,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB113_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21710,6 +26332,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB114_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -21783,6 +26436,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB114_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -21942,6 +26626,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB115_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -22015,6 +26730,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB115_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -22174,6 +26920,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB116_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22278,6 +27055,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB116_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22530,6 +27338,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB117_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22634,6 +27473,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB117_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22886,6 +27756,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB118_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22990,6 +27891,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB118_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -23242,6 +28174,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB119_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23315,6 +28278,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB119_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23476,6 +28470,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB120_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23545,6 +28564,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB120_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23688,6 +28732,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB121_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -23782,6 +28851,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB121_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24000,6 +29094,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB122_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24094,6 +29213,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB122_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24312,6 +29456,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB123_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24406,6 +29575,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB123_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24624,6 +29818,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB124_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24693,6 +29912,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB124_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24836,6 +30080,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB125_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24905,6 +30174,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB125_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -25048,6 +30342,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB126_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25142,6 +30461,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB126_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25360,6 +30704,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB127_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25454,6 +30823,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB127_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25672,6 +31066,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB128_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25766,6 +31185,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB128_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25984,6 +31428,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB129_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -26053,6 +31522,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB129_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -26162,6 +31656,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB130_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoswap.w a0, a1, (a0) @@ -26177,6 +31682,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB130_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xchg_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.w a0, a1, (a0) @@ -26196,6 +31712,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB131_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0) @@ -26216,6 +31743,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB131_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0) @@ -26240,6 +31778,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB132_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0) @@ -26260,6 +31809,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB132_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0) @@ -26284,6 +31844,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB133_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26304,6 +31875,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB133_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26328,6 +31910,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB134_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26348,6 +31941,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB134_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26372,6 +31976,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB135_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoadd.w a0, a1, (a0) @@ -26387,6 +32002,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB135_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_add_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.w a0, a1, (a0) @@ -26406,6 +32032,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB136_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0) @@ -26426,6 +32063,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB136_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0) @@ -26450,6 +32098,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB137_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0) @@ -26470,6 +32129,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB137_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0) @@ -26494,6 +32164,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB138_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26514,6 +32195,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB138_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26538,6 +32230,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB139_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26558,6 +32261,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB139_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26582,6 +32296,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB140_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: neg a1, a1 @@ -26598,6 +32323,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB140_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_sub_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: neg a1, a1 @@ -26618,6 +32354,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB141_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26640,6 +32387,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB141_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26666,6 +32424,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB142_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26688,6 +32457,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB142_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26714,6 +32494,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB143_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26736,6 +32527,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB143_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26762,6 +32564,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB144_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26784,6 +32597,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB144_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26810,6 +32634,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB145_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoand.w a0, a1, (a0) @@ -26825,6 +32660,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB145_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_and_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.w a0, a1, (a0) @@ -26844,6 +32690,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB146_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aq a0, a1, (a0) @@ -26864,6 +32721,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB146_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aq a0, a1, (a0) @@ -26888,6 +32756,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB147_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.rl a0, a1, (a0) @@ -26908,6 +32787,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB147_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.rl a0, a1, (a0) @@ -26932,6 +32822,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB148_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26952,6 +32853,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB148_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26976,6 +32888,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB149_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26996,6 +32919,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB149_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -27020,6 +32954,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB150_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 @@ -27042,6 +32988,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB150_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 @@ -27200,6 +33158,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB151_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 @@ -27234,6 +33204,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB151_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 @@ -27432,6 +33414,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB152_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 @@ -27466,6 +33460,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB152_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 @@ -27664,6 +33670,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB153_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 @@ -27698,6 +33716,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB153_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 @@ -27896,6 +33926,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB154_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 @@ -27918,6 +33960,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB154_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 @@ -28112,6 +34166,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB155_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoor.w a0, a1, (a0) @@ -28127,6 +34192,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB155_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_or_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.w a0, a1, (a0) @@ -28146,6 +34222,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB156_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aq a0, a1, (a0) @@ -28166,6 +34253,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB156_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aq a0, a1, (a0) @@ -28190,6 +34288,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB157_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.rl a0, a1, (a0) @@ -28210,6 +34319,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB157_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.rl a0, a1, (a0) @@ -28234,6 +34354,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB158_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28254,6 +34385,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB158_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28278,6 +34420,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB159_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28298,6 +34451,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB159_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28322,6 +34486,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB160_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoxor.w a0, a1, (a0) @@ -28337,6 +34512,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB160_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xor_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.w a0, a1, (a0) @@ -28356,6 +34542,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB161_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0) @@ -28376,6 +34573,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB161_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0) @@ -28400,6 +34608,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB162_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0) @@ -28420,6 +34639,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB162_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0) @@ -28444,6 +34674,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB163_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28464,6 +34705,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB163_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28488,6 +34740,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB164_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28508,6 +34771,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB164_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28558,6 +34832,21 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB165_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB165_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomax.w a0, a1, (a0) @@ -28602,6 +34891,22 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB165_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB165_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_max_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.w a0, a1, (a0) @@ -28647,6 +34952,21 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB166_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB166_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aq a0, a1, (a0) @@ -28696,6 +35016,22 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB166_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB166_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aq a0, a1, (a0) @@ -28746,6 +35082,21 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB167_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB167_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.rl a0, a1, (a0) @@ -28795,6 +35146,22 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB167_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB167_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.rl a0, a1, (a0) @@ -28845,6 +35212,21 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB168_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB168_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28894,6 +35276,22 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB168_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB168_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28944,6 +35342,21 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB169_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB169_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28993,6 +35406,22 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB169_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB169_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -29043,6 +35472,21 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB170_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB170_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomin.w a0, a1, (a0) @@ -29087,6 +35531,22 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB170_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB170_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_min_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.w a0, a1, (a0) @@ -29132,6 +35592,21 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB171_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB171_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aq a0, a1, (a0) @@ -29181,6 +35656,22 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB171_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB171_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aq a0, a1, (a0) @@ -29231,6 +35722,21 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB172_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB172_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.rl a0, a1, (a0) @@ -29280,6 +35786,22 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB172_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB172_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.rl a0, a1, (a0) @@ -29330,6 +35852,21 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB173_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB173_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29379,6 +35916,22 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB173_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB173_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29429,6 +35982,21 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB174_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB174_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29478,6 +36046,22 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB174_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB174_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29528,6 +36112,21 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB175_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB175_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomaxu.w a0, a1, (a0) @@ -29572,6 +36171,22 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB175_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB175_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umax_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.w a0, a1, (a0) @@ -29617,6 +36232,21 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB176_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB176_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0) @@ -29666,6 +36296,22 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB176_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB176_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0) @@ -29716,6 +36362,21 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB177_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB177_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0) @@ -29765,6 +36426,22 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB177_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB177_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0) @@ -29815,6 +36492,21 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB178_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB178_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29864,6 +36556,22 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB178_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB178_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29914,6 +36622,21 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB179_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB179_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29963,6 +36686,22 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB179_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB179_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -30013,6 +36752,21 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB180_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB180_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amominu.w a0, a1, (a0) @@ -30057,6 +36811,22 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB180_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB180_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umin_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.w a0, a1, (a0) @@ -30102,6 +36872,21 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB181_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB181_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aq a0, a1, (a0) @@ -30151,6 +36936,22 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB181_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB181_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aq a0, a1, (a0) @@ -30201,6 +37002,21 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB182_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB182_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.rl a0, a1, (a0) @@ -30250,6 +37066,22 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB182_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB182_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.rl a0, a1, (a0) @@ -30300,6 +37132,21 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB183_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB183_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30349,6 +37196,22 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB183_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB183_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30399,6 +37262,21 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB184_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB184_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30448,6 +37326,22 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB184_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB184_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30472,6 +37366,16 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30492,6 +37396,17 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB185_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB185_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xchg_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.d a0, a1, (a0) @@ -30511,6 +37426,16 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30531,6 +37456,17 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB186_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB186_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aq a0, a1, (a0) @@ -30555,6 +37491,16 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30575,6 +37521,17 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB187_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB187_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.rl a0, a1, (a0) @@ -30599,6 +37556,16 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30619,6 +37586,17 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB188_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB188_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0) @@ -30643,6 +37621,16 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30663,6 +37651,17 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB189_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB189_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0) @@ -30687,6 +37686,16 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30707,6 +37716,17 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB190_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB190_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_add_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.d a0, a1, (a0) @@ -30726,6 +37746,16 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30746,6 +37776,17 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB191_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB191_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aq a0, a1, (a0) @@ -30770,6 +37811,16 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30790,6 +37841,17 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB192_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB192_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.rl a0, a1, (a0) @@ -30814,6 +37876,16 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30834,6 +37906,17 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB193_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB193_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0) @@ -30858,6 +37941,16 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30878,6 +37971,17 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB194_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB194_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0) @@ -30902,6 +38006,16 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30922,6 +38036,17 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB195_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB195_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_sub_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: neg a1, a1 @@ -30942,6 +38067,16 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30962,6 +38097,17 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB196_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB196_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -30988,6 +38134,16 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31008,6 +38164,17 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB197_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB197_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31034,6 +38201,16 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31054,6 +38231,17 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB198_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB198_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31080,6 +38268,16 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31100,6 +38298,17 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB199_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB199_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31126,6 +38335,16 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31146,6 +38365,17 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB200_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB200_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_and_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.d a0, a1, (a0) @@ -31165,6 +38395,16 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31185,6 +38425,17 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB201_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB201_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aq a0, a1, (a0) @@ -31209,6 +38460,16 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31229,6 +38490,17 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB202_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB202_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.rl a0, a1, (a0) @@ -31253,6 +38525,16 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31273,6 +38555,17 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB203_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB203_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0) @@ -31297,6 +38590,16 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31317,6 +38620,17 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB204_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB204_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0) @@ -31341,6 +38655,16 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31361,6 +38685,18 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB205_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1 @@ -31453,6 +38789,16 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31473,6 +38819,18 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB206_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 @@ -31591,6 +38949,16 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31611,6 +38979,18 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB207_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 @@ -31729,6 +39109,16 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31749,6 +39139,18 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB208_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 @@ -31867,6 +39269,16 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31887,6 +39299,18 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB209_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1 @@ -31997,6 +39421,16 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32017,6 +39451,17 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB210_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB210_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_or_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.d a0, a1, (a0) @@ -32036,6 +39481,16 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32056,6 +39511,17 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB211_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB211_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aq a0, a1, (a0) @@ -32080,6 +39546,16 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32100,6 +39576,17 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB212_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB212_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.rl a0, a1, (a0) @@ -32124,6 +39611,16 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32144,6 +39641,17 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB213_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB213_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0) @@ -32168,6 +39676,16 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32188,6 +39706,17 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB214_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB214_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0) @@ -32212,6 +39741,16 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32232,6 +39771,17 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB215_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB215_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xor_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.d a0, a1, (a0) @@ -32251,6 +39801,16 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32271,6 +39831,17 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB216_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB216_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aq a0, a1, (a0) @@ -32295,6 +39866,16 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32315,6 +39896,17 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB217_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB217_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.rl a0, a1, (a0) @@ -32339,6 +39931,16 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32359,6 +39961,17 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB218_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB218_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0) @@ -32383,6 +39996,16 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32403,6 +40026,17 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB219_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB219_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0) @@ -32471,6 +40105,60 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB220_2 +; RV32I-ZALRSC-NEXT: .LBB220_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_7 +; RV32I-ZALRSC-NEXT: .LBB220_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB220_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB220_5 +; RV32I-ZALRSC-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB220_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB220_1 +; RV32I-ZALRSC-NEXT: .LBB220_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32561,6 +40249,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB220_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB220_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB220_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB220_3: # in Loop: Header=BB220_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB220_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_max_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.d a0, a1, (a0) @@ -32624,6 +40327,60 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB221_2 +; RV32I-ZALRSC-NEXT: .LBB221_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_7 +; RV32I-ZALRSC-NEXT: .LBB221_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB221_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB221_5 +; RV32I-ZALRSC-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB221_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB221_1 +; RV32I-ZALRSC-NEXT: .LBB221_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32714,6 +40471,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB221_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB221_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB221_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB221_3: # in Loop: Header=BB221_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB221_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aq a0, a1, (a0) @@ -32782,6 +40554,60 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB222_2 +; RV32I-ZALRSC-NEXT: .LBB222_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_7 +; RV32I-ZALRSC-NEXT: .LBB222_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB222_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB222_5 +; RV32I-ZALRSC-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB222_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB222_1 +; RV32I-ZALRSC-NEXT: .LBB222_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32872,6 +40698,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB222_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB222_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB222_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB222_3: # in Loop: Header=BB222_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB222_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.rl a0, a1, (a0) @@ -32940,6 +40781,60 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB223_2 +; RV32I-ZALRSC-NEXT: .LBB223_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_7 +; RV32I-ZALRSC-NEXT: .LBB223_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB223_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB223_5 +; RV32I-ZALRSC-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB223_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB223_1 +; RV32I-ZALRSC-NEXT: .LBB223_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33030,6 +40925,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB223_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB223_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB223_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB223_3: # in Loop: Header=BB223_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB223_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0) @@ -33098,6 +41008,60 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB224_2 +; RV32I-ZALRSC-NEXT: .LBB224_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_7 +; RV32I-ZALRSC-NEXT: .LBB224_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB224_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB224_5 +; RV32I-ZALRSC-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB224_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB224_1 +; RV32I-ZALRSC-NEXT: .LBB224_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33188,6 +41152,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB224_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB224_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB224_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB224_3: # in Loop: Header=BB224_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB224_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0) @@ -33256,6 +41235,60 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB225_2 +; RV32I-ZALRSC-NEXT: .LBB225_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB225_7 +; RV32I-ZALRSC-NEXT: .LBB225_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB225_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB225_5 +; RV32I-ZALRSC-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB225_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB225_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB225_1 +; RV32I-ZALRSC-NEXT: .LBB225_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33346,6 +41379,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB225_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB225_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB225_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB225_3: # in Loop: Header=BB225_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB225_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_min_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.d a0, a1, (a0) @@ -33409,6 +41457,60 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB226_2 +; RV32I-ZALRSC-NEXT: .LBB226_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB226_7 +; RV32I-ZALRSC-NEXT: .LBB226_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB226_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB226_5 +; RV32I-ZALRSC-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB226_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB226_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB226_1 +; RV32I-ZALRSC-NEXT: .LBB226_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33499,6 +41601,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB226_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB226_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB226_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB226_3: # in Loop: Header=BB226_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB226_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aq a0, a1, (a0) @@ -33567,6 +41684,60 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB227_2 +; RV32I-ZALRSC-NEXT: .LBB227_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB227_7 +; RV32I-ZALRSC-NEXT: .LBB227_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB227_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB227_5 +; RV32I-ZALRSC-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB227_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB227_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB227_1 +; RV32I-ZALRSC-NEXT: .LBB227_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33657,6 +41828,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB227_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB227_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB227_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB227_3: # in Loop: Header=BB227_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB227_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.rl a0, a1, (a0) @@ -33725,6 +41911,60 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB228_2 +; RV32I-ZALRSC-NEXT: .LBB228_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB228_7 +; RV32I-ZALRSC-NEXT: .LBB228_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB228_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB228_5 +; RV32I-ZALRSC-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB228_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB228_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB228_1 +; RV32I-ZALRSC-NEXT: .LBB228_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33815,6 +42055,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB228_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB228_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB228_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB228_3: # in Loop: Header=BB228_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB228_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0) @@ -33883,6 +42138,60 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB229_2 +; RV32I-ZALRSC-NEXT: .LBB229_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB229_7 +; RV32I-ZALRSC-NEXT: .LBB229_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB229_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB229_5 +; RV32I-ZALRSC-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB229_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB229_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB229_1 +; RV32I-ZALRSC-NEXT: .LBB229_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33973,6 +42282,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB229_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB229_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB229_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB229_3: # in Loop: Header=BB229_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB229_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0) @@ -34041,6 +42365,60 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB230_2 +; RV32I-ZALRSC-NEXT: .LBB230_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_7 +; RV32I-ZALRSC-NEXT: .LBB230_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB230_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB230_5 +; RV32I-ZALRSC-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB230_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB230_1 +; RV32I-ZALRSC-NEXT: .LBB230_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34131,6 +42509,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB230_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB230_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB230_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB230_3: # in Loop: Header=BB230_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB230_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umax_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.d a0, a1, (a0) @@ -34194,6 +42587,60 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB231_2 +; RV32I-ZALRSC-NEXT: .LBB231_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_7 +; RV32I-ZALRSC-NEXT: .LBB231_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB231_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB231_5 +; RV32I-ZALRSC-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB231_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB231_1 +; RV32I-ZALRSC-NEXT: .LBB231_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34284,6 +42731,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB231_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB231_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB231_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB231_3: # in Loop: Header=BB231_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB231_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aq a0, a1, (a0) @@ -34352,6 +42814,60 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB232_2 +; RV32I-ZALRSC-NEXT: .LBB232_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_7 +; RV32I-ZALRSC-NEXT: .LBB232_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB232_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB232_5 +; RV32I-ZALRSC-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB232_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB232_1 +; RV32I-ZALRSC-NEXT: .LBB232_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34442,6 +42958,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB232_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB232_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB232_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB232_3: # in Loop: Header=BB232_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB232_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.rl a0, a1, (a0) @@ -34510,6 +43041,60 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB233_2 +; RV32I-ZALRSC-NEXT: .LBB233_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_7 +; RV32I-ZALRSC-NEXT: .LBB233_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB233_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB233_5 +; RV32I-ZALRSC-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB233_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB233_1 +; RV32I-ZALRSC-NEXT: .LBB233_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34600,6 +43185,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB233_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB233_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB233_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB233_3: # in Loop: Header=BB233_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB233_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0) @@ -34668,6 +43268,60 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB234_2 +; RV32I-ZALRSC-NEXT: .LBB234_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_7 +; RV32I-ZALRSC-NEXT: .LBB234_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB234_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB234_5 +; RV32I-ZALRSC-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB234_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB234_1 +; RV32I-ZALRSC-NEXT: .LBB234_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34758,6 +43412,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB234_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB234_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB234_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB234_3: # in Loop: Header=BB234_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB234_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0) @@ -34826,6 +43495,60 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB235_2 +; RV32I-ZALRSC-NEXT: .LBB235_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB235_7 +; RV32I-ZALRSC-NEXT: .LBB235_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB235_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB235_5 +; RV32I-ZALRSC-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB235_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB235_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB235_1 +; RV32I-ZALRSC-NEXT: .LBB235_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34916,6 +43639,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB235_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB235_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB235_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB235_3: # in Loop: Header=BB235_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB235_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umin_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.d a0, a1, (a0) @@ -34979,6 +43717,60 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB236_2 +; RV32I-ZALRSC-NEXT: .LBB236_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB236_7 +; RV32I-ZALRSC-NEXT: .LBB236_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB236_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB236_5 +; RV32I-ZALRSC-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB236_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB236_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB236_1 +; RV32I-ZALRSC-NEXT: .LBB236_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35069,6 +43861,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB236_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB236_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB236_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB236_3: # in Loop: Header=BB236_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB236_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aq a0, a1, (a0) @@ -35137,6 +43944,60 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB237_2 +; RV32I-ZALRSC-NEXT: .LBB237_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB237_7 +; RV32I-ZALRSC-NEXT: .LBB237_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB237_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB237_5 +; RV32I-ZALRSC-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB237_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB237_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB237_1 +; RV32I-ZALRSC-NEXT: .LBB237_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35227,6 +44088,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB237_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB237_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB237_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB237_3: # in Loop: Header=BB237_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB237_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.rl a0, a1, (a0) @@ -35295,6 +44171,60 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB238_2 +; RV32I-ZALRSC-NEXT: .LBB238_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB238_7 +; RV32I-ZALRSC-NEXT: .LBB238_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB238_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB238_5 +; RV32I-ZALRSC-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB238_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB238_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB238_1 +; RV32I-ZALRSC-NEXT: .LBB238_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35385,6 +44315,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB238_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB238_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB238_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB238_3: # in Loop: Header=BB238_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB238_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0) @@ -35453,6 +44398,60 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB239_2 +; RV32I-ZALRSC-NEXT: .LBB239_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB239_7 +; RV32I-ZALRSC-NEXT: .LBB239_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB239_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB239_5 +; RV32I-ZALRSC-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB239_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB239_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB239_1 +; RV32I-ZALRSC-NEXT: .LBB239_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35543,6 +44542,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB239_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB239_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB239_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB239_3: # in Loop: Header=BB239_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB239_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0) diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 7d29ac9..7fe5fa7 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -5,12 +5,16 @@ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32I-LABEL: atomic_load_i8_unordered: @@ -30,6 +34,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lb a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -46,6 +55,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i8, ptr %a unordered, align 1 ret i8 %1 } @@ -68,6 +82,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lh a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -84,6 +103,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i16, ptr %a unordered, align 2 ret i16 %1 } @@ -104,6 +128,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -119,6 +148,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i32, ptr %a unordered, align 4 ret i32 %1 } @@ -159,6 +193,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -192,6 +248,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b monotonic ret i8 %1 } @@ -231,6 +309,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -264,6 +364,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b monotonic ret i8 %1 } @@ -303,6 +425,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB5_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -336,6 +480,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB5_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b monotonic ret i8 %1 } @@ -369,6 +535,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -396,6 +583,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i8 %b monotonic ret i8 %1 } @@ -436,6 +644,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB7_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -470,6 +701,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB7_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b monotonic ret i8 %1 } @@ -499,6 +753,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -522,6 +793,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i8 %b monotonic ret i8 %1 } @@ -551,6 +839,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -574,6 +879,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i8 %b monotonic ret i8 %1 } @@ -653,6 +975,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB10_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB10_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -726,6 +1079,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB10_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB10_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b monotonic ret i8 %1 } @@ -805,6 +1189,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB11_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB11_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -878,6 +1293,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB11_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB11_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b monotonic ret i8 %1 } @@ -950,6 +1396,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB12_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1016,6 +1488,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB12_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b monotonic ret i8 %1 } @@ -1088,6 +1586,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB13_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1154,6 +1678,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB13_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b monotonic ret i8 %1 } @@ -1194,6 +1744,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB14_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1228,6 +1801,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB14_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b monotonic ret i16 %1 } @@ -1268,6 +1864,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1302,6 +1921,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b monotonic ret i16 %1 } @@ -1342,6 +1984,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1376,6 +2041,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b monotonic ret i16 %1 } @@ -1410,6 +2098,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB17_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1438,6 +2148,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB17_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i16 %b monotonic ret i16 %1 } @@ -1479,6 +2211,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1514,6 +2270,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b monotonic ret i16 %1 } @@ -1544,6 +2324,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB19_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1568,6 +2366,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB19_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i16 %b monotonic ret i16 %1 } @@ -1598,6 +2414,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB20_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1622,6 +2456,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB20_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i16 %b monotonic ret i16 %1 } @@ -1703,6 +2555,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB21_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1778,6 +2663,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB21_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b monotonic ret i16 %1 } @@ -1859,6 +2777,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB22_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1934,6 +2885,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB22_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b monotonic ret i16 %1 } @@ -2011,6 +2995,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2082,6 +3093,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b monotonic ret i16 %1 } @@ -2159,6 +3197,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2230,6 +3295,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b monotonic ret i16 %1 } @@ -2250,6 +3342,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoswap.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB25_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2265,6 +3368,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB25_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i32 %b monotonic ret i32 %1 } @@ -2285,6 +3399,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB26_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2300,6 +3425,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB26_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i32 %b monotonic ret i32 %1 } @@ -2321,6 +3457,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB27_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2337,6 +3484,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA-NEXT: neg a1, a1 ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB27_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i32 %b monotonic ret i32 %1 } @@ -2357,6 +3515,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoand.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB28_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2372,6 +3541,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB28_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i32 %b monotonic ret i32 %1 } @@ -2413,6 +3593,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB29_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2449,6 +3641,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB29_1 ; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB29_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i32 %b monotonic ret i32 %1 } @@ -2469,6 +3673,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB30_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2484,6 +3699,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB30_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i32 %b monotonic ret i32 %1 } @@ -2504,6 +3730,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoxor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB31_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2519,6 +3756,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB31_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i32 %b monotonic ret i32 %1 } @@ -2565,6 +3813,21 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomax.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB32_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB32_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2608,6 +3871,22 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB32_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB32_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i32 %b monotonic ret i32 %1 } @@ -2654,6 +3933,21 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomin.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB33_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB33_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2697,6 +3991,22 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB33_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB33_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i32 %b monotonic ret i32 %1 } @@ -2743,6 +4053,21 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomaxu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB34_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB34_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2786,6 +4111,22 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB34_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB34_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i32 %b monotonic ret i32 %1 } @@ -2832,6 +4173,21 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amominu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB35_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB35_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2875,6 +4231,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB35_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB35_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i32 %b monotonic ret i32 %1 } @@ -2900,6 +4272,16 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2914,6 +4296,17 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB36_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i64 %b monotonic ret i64 %1 } @@ -2939,6 +4332,16 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2953,6 +4356,17 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB37_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i64 %b monotonic ret i64 %1 } @@ -2978,6 +4392,16 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2993,6 +4417,17 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA-NEXT: neg a1, a1 ; RV64IA-NEXT: amoadd.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB38_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i64 %b monotonic ret i64 %1 } @@ -3018,6 +4453,16 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3032,6 +4477,17 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB39_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i64 %b monotonic ret i64 %1 } @@ -3057,6 +4513,16 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3092,6 +4558,18 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB40_1 ; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB40_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i64 %b monotonic ret i64 %1 } @@ -3117,6 +4595,16 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3131,6 +4619,17 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB41_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i64 %b monotonic ret i64 %1 } @@ -3156,6 +4655,16 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3170,6 +4679,17 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB42_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i64 %b monotonic ret i64 %1 } @@ -3283,6 +4803,60 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB43_2 +; RV32I-ZALRSC-NEXT: .LBB43_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_7 +; RV32I-ZALRSC-NEXT: .LBB43_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB43_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB43_5 +; RV32I-ZALRSC-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB43_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB43_1 +; RV32I-ZALRSC-NEXT: .LBB43_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3323,6 +4897,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB43_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB43_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i64 %b monotonic ret i64 %1 } @@ -3436,6 +5025,60 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB44_2 +; RV32I-ZALRSC-NEXT: .LBB44_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB44_7 +; RV32I-ZALRSC-NEXT: .LBB44_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB44_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB44_5 +; RV32I-ZALRSC-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB44_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB44_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB44_1 +; RV32I-ZALRSC-NEXT: .LBB44_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3476,6 +5119,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB44_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB44_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i64 %b monotonic ret i64 %1 } @@ -3589,6 +5247,60 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB45_2 +; RV32I-ZALRSC-NEXT: .LBB45_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_7 +; RV32I-ZALRSC-NEXT: .LBB45_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB45_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB45_5 +; RV32I-ZALRSC-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB45_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB45_1 +; RV32I-ZALRSC-NEXT: .LBB45_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3629,6 +5341,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB45_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB45_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i64 %b monotonic ret i64 %1 } @@ -3742,6 +5469,60 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB46_2 +; RV32I-ZALRSC-NEXT: .LBB46_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB46_7 +; RV32I-ZALRSC-NEXT: .LBB46_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB46_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB46_5 +; RV32I-ZALRSC-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB46_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB46_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB46_1 +; RV32I-ZALRSC-NEXT: .LBB46_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3782,6 +5563,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB46_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB46_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i64 %b monotonic ret i64 %1 } @@ -3827,6 +5623,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: zext.b a2, a2 +; RV32I-ZALRSC-NEXT: sll a4, a4, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a3) +; RV32I-ZALRSC-NEXT: and a6, a5, a4 +; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a2 +; RV32I-ZALRSC-NEXT: and a6, a6, a4 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV32I-ZALRSC-NEXT: .LBB47_3: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3866,6 +5688,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: zext.b a2, a2 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a3) +; RV64I-ZALRSC-NEXT: and a6, a5, a4 +; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a2 +; RV64I-ZALRSC-NEXT: and a6, a6, a4 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV64I-ZALRSC-NEXT: .LBB47_3: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic %2 = extractvalue { i8, i1 } %1, 0 ret i8 %2 @@ -3911,6 +5759,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: zext.b a2, a2 +; RV32I-ZALRSC-NEXT: sll a4, a4, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a3) +; RV32I-ZALRSC-NEXT: and a5, a2, a4 +; RV32I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a2, a0 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a2, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB48_1 +; RV32I-ZALRSC-NEXT: .LBB48_3: +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3949,6 +5823,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; RV64IA-NEXT: xor a1, a1, a2 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: zext.b a2, a2 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a3) +; RV64I-ZALRSC-NEXT: and a5, a2, a4 +; RV64I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a2, a0 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a2, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB48_1 +; RV64I-ZALRSC-NEXT: .LBB48_3: +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic %2 = extractvalue { i8, i1 } %1, 1 ret i1 %2 @@ -3996,6 +5896,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a4, 16 +; RV32I-ZALRSC-NEXT: addi a4, a4, -1 +; RV32I-ZALRSC-NEXT: sll a5, a4, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a4 +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a5 +; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a2 +; RV32I-ZALRSC-NEXT: and a6, a6, a5 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV32I-ZALRSC-NEXT: .LBB49_3: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4036,6 +5963,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a4, 16 +; RV64I-ZALRSC-NEXT: addi a4, a4, -1 +; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a4 +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a5 +; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a2 +; RV64I-ZALRSC-NEXT: and a6, a6, a5 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV64I-ZALRSC-NEXT: .LBB49_3: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic %2 = extractvalue { i16, i1 } %1, 0 ret i16 %2 @@ -4082,6 +6036,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a4, 16 +; RV32I-ZALRSC-NEXT: addi a4, a4, -1 +; RV32I-ZALRSC-NEXT: sll a5, a4, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a4 +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a3) +; RV32I-ZALRSC-NEXT: and a4, a2, a5 +; RV32I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a4, a2, a0 +; RV32I-ZALRSC-NEXT: and a4, a4, a5 +; RV32I-ZALRSC-NEXT: xor a4, a2, a4 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB50_1 +; RV32I-ZALRSC-NEXT: .LBB50_3: +; RV32I-ZALRSC-NEXT: and a2, a2, a5 +; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4121,6 +6102,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; RV64IA-NEXT: xor a1, a1, a2 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a4, 16 +; RV64I-ZALRSC-NEXT: addi a4, a4, -1 +; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a4 +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a3) +; RV64I-ZALRSC-NEXT: and a4, a2, a5 +; RV64I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a4, a2, a0 +; RV64I-ZALRSC-NEXT: and a4, a4, a5 +; RV64I-ZALRSC-NEXT: xor a4, a2, a4 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB50_1 +; RV64I-ZALRSC-NEXT: .LBB50_3: +; RV64I-ZALRSC-NEXT: and a2, a2, a5 +; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic %2 = extractvalue { i16, i1 } %1, 1 ret i1 %2 @@ -4159,6 +6167,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext % ; RV32IA-ZACAS-NEXT: mv a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB51_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB51_1 +; RV32I-ZALRSC-NEXT: .LBB51_3: +; RV32I-ZALRSC-NEXT: mv a0, a3 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4190,6 +6210,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext % ; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0) ; RV64IA-ZACAS-NEXT: mv a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB51_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB51_1 +; RV64I-ZALRSC-NEXT: .LBB51_3: +; RV64I-ZALRSC-NEXT: mv a0, a3 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic %2 = extractvalue { i32, i1 } %1, 0 ret i32 %2 @@ -4230,6 +6262,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32 ; RV32IA-ZACAS-NEXT: seqz a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB52_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB52_1 +; RV32I-ZALRSC-NEXT: .LBB52_3: +; RV32I-ZALRSC-NEXT: xor a1, a3, a1 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4263,6 +6308,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32 ; RV64IA-ZACAS-NEXT: xor a1, a3, a1 ; RV64IA-ZACAS-NEXT: seqz a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB52_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB52_1 +; RV64I-ZALRSC-NEXT: .LBB52_3: +; RV64I-ZALRSC-NEXT: xor a1, a3, a1 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic %2 = extractvalue { i32, i1 } %1, 1 ret i1 %2 @@ -4304,6 +6362,27 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB53_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB53_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB53_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB53_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4339,6 +6418,28 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: li a2, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB53_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB53_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB53_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB53_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4391,6 +6492,27 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB54_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB54_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: add a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB54_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB54_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: addi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4426,6 +6548,28 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: addi a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB54_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB54_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: add a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB54_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB54_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: addi a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4479,6 +6623,27 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB55_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB55_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: sub a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB55_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB55_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: addi a2, a0, -1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4514,6 +6679,28 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: addi a2, a0, -1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB55_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB55_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB55_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB55_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: addi a2, a1, -1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4567,6 +6754,27 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB56_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB56_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: and a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB56_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB56_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: andi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4602,6 +6810,28 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: andi a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB56_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB56_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: and a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB56_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB56_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4685,6 +6915,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-ZACAS-NEXT: mv a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB57_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB57_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: and a3, a0, a2 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB57_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB57_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: andi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4750,6 +7002,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-ZACAS-NEXT: sw a2, 0(a0) ; RV64IA-ZACAS-NEXT: mv a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB57_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB57_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: and a3, a0, a2 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB57_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB57_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: andi a2, a0, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4803,6 +7077,27 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB58_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB58_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: or a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB58_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB58_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: ori a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4838,6 +7133,28 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV64IA-NEXT: ori a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB58_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB58_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: or a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB58_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB58_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ori a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4891,6 +7208,27 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB59_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB59_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: xor a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB59_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB59_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: xori a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4926,6 +7264,28 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: xori a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB59_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB59_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB59_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB59_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: xori a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5007,6 +7367,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB60_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB60_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bge a3, a2, .LBB60_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB60_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB60_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB60_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: bgtz a0, .LBB60_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB60_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5070,6 +7461,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB60_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB60_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB60_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB60_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB60_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB60_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB60_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: bgtz a0, .LBB60_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB60_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5155,6 +7577,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB61_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB61_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bge a2, a3, .LBB61_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB61_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB61_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB61_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: blez a0, .LBB61_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB61_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5220,6 +7673,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB61_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB61_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB61_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB61_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB61_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB61_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB61_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: blez a0, .LBB61_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB61_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5290,6 +7774,34 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB62_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB62_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB62_5: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB62_3 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB62_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: seqz a2, a0 +; RV32I-ZALRSC-NEXT: add a2, a0, a2 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5347,6 +7859,35 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: add a2, a0, a2 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB62_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB62_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB62_5: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB62_3 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB62_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: seqz a2, a1 +; RV64I-ZALRSC-NEXT: add a2, a1, a2 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5434,6 +7975,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB63_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB63_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB63_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB63_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB63_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: li a3, 1 +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB63_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5501,6 +8074,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB63_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB63_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB63_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB63_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB63_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB63_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: li a3, 1 +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB63_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5570,6 +8175,25 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV32IA-ZACAS-NEXT: lw a0, 0(a0) ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: mv a4, a0 +; RV32I-ZALRSC-NEXT: beqz a3, .LBB64_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: .LBB64_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a0, (a4) +; RV32I-ZALRSC-NEXT: bne a0, a1, .LBB64_5 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a2, (a4) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB64_3 +; RV32I-ZALRSC-NEXT: .LBB64_5: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB64_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a4) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a3, .LBB64_2 @@ -5620,6 +8244,26 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV64IA-ZACAS-NEXT: .LBB64_2: # %else ; RV64IA-ZACAS-NEXT: lw a0, 0(a0) ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: beqz a3, .LBB64_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: .LBB64_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB64_5 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB64_3 +; RV64I-ZALRSC-NEXT: .LBB64_5: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a3 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB64_2: # %else +; RV64I-ZALRSC-NEXT: lw a3, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a3 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 693a40d..5e5f2b7 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -217,6 +217,11 @@ ; CHECK-NEXT: xsfmm64t - 'XSfmm64t' (TE=64 configuration). ; CHECK-NEXT: xsfmmbase - 'XSfmmbase' (All non arithmetic instructions for all TEWs and sf.vtzero). ; CHECK-NEXT: xsfvcp - 'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions). +; CHECK-NEXT: xsfvfbfexp16e - 'XSfvfbfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, BFloat16). +; CHECK-NEXT: xsfvfexp16e - 'XSfvfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, Half Precision). +; CHECK-NEXT: xsfvfexp32e - 'XSfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction, Single Precision). +; CHECK-NEXT: xsfvfexpa - 'XSfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction). +; CHECK-NEXT: xsfvfexpa64e - 'XSfvfexpa64e' (SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision). ; CHECK-NEXT: xsfvfnrclipxfqf - 'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions). ; CHECK-NEXT: xsfvfwmaccqqq - 'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction (4-by-4)). ; CHECK-NEXT: xsfvqmaccdod - 'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)). diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll index cce1eda..1aee688 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} %"__cblayout_$Globals" = type <{ i32 }> @@ -9,7 +10,6 @@ ; CHECK: OpCapability Shader ; CHECK: OpCapability StorageTexelBufferArrayDynamicIndexingEXT - define void @main() local_unnamed_addr #0 { entry: %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") @@ -19,4 +19,8 @@ entry: %2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 98) store i32 99, ptr addrspace(11) %2, align 4 ret void -}
\ No newline at end of file +} + +!hlsl.cbs = !{!0} + +!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll index da69a2f..163fc9d 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} %"__cblayout_$Globals" = type <{ i32 }> @@ -19,3 +20,7 @@ entry: store i32 98, ptr addrspace(11) %2, align 4 ret void } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll new file mode 100644 index 0000000..7c44b6d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; When accessing read-only `Buffer` types, SPIR-V should use `OpImageFetch` instead of `OpImageRead`. +; https://github.com/llvm/llvm-project/issues/162891 + +; CHECK-DAG: OpCapability SampledBuffer +; CHECK-DAG: OpCapability ImageBuffer +; CHECK-DAG: [[TypeInt:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[TypeImageBuffer:%[0-9]+]] = OpTypeImage [[TypeInt]] Buffer 2 0 0 1 Unknown +; CHECK-DAG: [[TypePtrImageBuffer:%[0-9]+]] = OpTypePointer UniformConstant [[TypeImageBuffer]] +; CHECK-DAG: [[TypeVector:%[0-9]+]] = OpTypeVector [[TypeInt]] 4 +; CHECK-DAG: [[Index:%[0-9]+]] = OpConstant [[TypeInt]] 98 +; CHECK-DAG: [[Variable:%[0-9]+]] = OpVariable [[TypePtrImageBuffer]] UniformConstant +@.str = private unnamed_addr constant [7 x i8] c"rwbuff\00", align 1 +@.str.2 = private unnamed_addr constant [5 x i8] c"buff\00", align 1 +@.str.4 = private unnamed_addr constant [8 x i8] c"unknown\00", align 1 + +define void @main() local_unnamed_addr #0 { + %1 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_1_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @.str.2) + %3 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_0_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @.str.4) + %4 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_1_0t(target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) %2, i32 98) +; CHECK: [[Load:%[0-9]+]] = OpLoad [[TypeImageBuffer]] [[Variable]] +; CHECK: [[ImageFetch:%[0-9]+]] = OpImageFetch [[TypeVector]] [[Load]] [[Index]] +; CHECK: {{.*}} = OpCompositeExtract [[TypeInt]] [[ImageFetch]] 0 + %5 = load i32, ptr addrspace(11) %4, align 4 + %6 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 99) + store i32 %5, ptr addrspace(11) %6, align 4 + %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 96) +; CHECK: {{%[0-9]+}} = OpLoad {{.*}} +; CHECK: {{%[0-9]+}} = OpImageRead {{.*}} + %8 = load i32, ptr addrspace(11) %7, align 4 + %9 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 97) + store i32 %8, ptr addrspace(11) %9, align 4 + %10 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_0_0t(target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) %3, i32 94) +; CHECK: {{%[0-9]+}} = OpLoad {{.*}} +; CHECK: {{%[0-9]+}} = OpImageRead {{.*}} + %11 = load i32, ptr addrspace(11) %10, align 4 + %12 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 95) + store i32 %11, ptr addrspace(11) %12, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll index 4a38d7a..c87f113 100644 --- a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll @@ -1,7 +1,7 @@ ; Test that combined sin/cos library call is emitted when appropriate ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-OPT -; RUN: llc < %s -mtriple=s390x-linux-gnu -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-OPT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-OPT define float @f1(float %x) { ; CHECK-OPT-LABEL: f1: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir new file mode 100644 index 0000000..5783133 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s + +# From bug #162644. The _wrong_ output of this test is to generate the +# body of the tail-predicated loop like this: +# +# $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 +# renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4) +# $q0 = MVE_VORR $q1, $q1, 0, $noreg, $noreg, undef $q0 +# renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0 +# $lr = MVE_LETP killed renamable $lr, %bb.1 +# +# in which the second MVE_VORR, copying q1 into q0, is an invalid conversion of +# the input MQPRCopy, because it won't copy the vector lanes disabled by +# FPSCR.LTPSIZE, and those are needed in the output value of the loop. +# +# In the right output, that MQPRCopy is expanded into a pair of VMOVD copying +# d2,d3 into d0,d1 respectively, which are unaffected by LTPSIZE. + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-none-eabihf" + + @inactive = dso_local local_unnamed_addr global <4 x float> zeroinitializer, align 16 + + define <4 x float> @test_func(ptr %0, i32 %1) { + %3 = load <4 x float>, ptr @inactive, align 16 + %4 = add i32 %1, 3 + %5 = call i32 @llvm.smin.i32(i32 %1, i32 4) + %6 = sub i32 %4, %5 + %7 = lshr i32 %6, 2 + %8 = add nuw nsw i32 %7, 1 + %9 = call i32 @llvm.start.loop.iterations.i32(i32 %8) + br label %10 + + 10: ; preds = %10, %2 + %11 = phi <4 x float> [ splat (float 0x3FB99999A0000000), %2 ], [ %17, %10 ] + %12 = phi i32 [ %1, %2 ], [ %19, %10 ] + %13 = phi ptr [ %0, %2 ], [ %18, %10 ] + %14 = phi i32 [ %9, %2 ], [ %20, %10 ] + %15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12) + %16 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %13, i32 4, <4 x i1> %15, <4 x float> zeroinitializer) + %17 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %11, <4 x float> %16, <4 x i1> %15, <4 x float> %3) + %18 = getelementptr inbounds nuw i8, ptr %13, i32 16 + %19 = add i32 %12, -4 + %20 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1) + %21 = icmp ne i32 %20, 0 + br i1 %21, label %10, label %22 + + 22: ; preds = %10 + ret <4 x float> %17 + } +... +--- +name: test_func +alignment: 4 +legalized: false +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: test_func + ; CHECK: bb.0 (%ir-block.2): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK-NEXT: $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive) + ; CHECK-NEXT: $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 + ; CHECK-NEXT: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.10, align 4): + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: liveins: $lr, $d2, $d3, $q0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 + ; CHECK-NEXT: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4) + ; CHECK-NEXT: $d0 = VMOVD $d2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $d1 = VMOVD $d3, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0 + ; CHECK-NEXT: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.22): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 + bb.0 (%ir-block.2): + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg + tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg + renamable $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive) + $r2 = tMOVr $r1, 14 /* CC::al */, $noreg + t2IT 10, 8, implicit-def $itstate + renamable $r2 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r2, implicit killed $itstate + renamable $r2, dead $cpsr = tSUBrr renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg + renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 3, 14 /* CC::al */, $noreg + renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg + $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg + $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 + renamable $lr = t2DoLoopStartTP killed renamable $r2, renamable $r1 + + bb.1 (%ir-block.10, align 4): + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $q1, $r0, $r1 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg + $q2 = MQPRCopy killed $q0 + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, renamable $lr :: (load unknown-size from %ir.13, align 4) + $q0 = MQPRCopy $q1 + MVE_VPST 8, implicit $vpr + renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 1, killed renamable $vpr, renamable $lr, killed renamable $q0 + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2 (%ir-block.22): + liveins: $q0 + + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 +... diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll index 053d6a1..d741411 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll @@ -94,5 +94,5 @@ attributes #1 = { minsize nofree norecurse nounwind optsize } !llvm.module.flags = !{!0, !1, !2} !0 = !{i32 8, !"branch-target-enforcement", i32 0} -!1 = !{i32 8, !"sign-return-address", i32 1} +!1 = !{i32 8, !"sign-return-address", i32 2} !2 = !{i32 8, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll index bea11e9..940fe8c 100644 --- a/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll +++ b/llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=i686-- -mattr=-sse | FileCheck %s -check-prefix=WITHNANS -; RUN: llc < %s -mtriple=i686-- -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s -check-prefix=NONANS +; RUN: llc < %s -mtriple=i686-- -mattr=-sse -enable-no-nans-fp-math | FileCheck %s -check-prefix=NONANS ; WITHNANS-LABEL: test: ; WITHNANS: setnp diff --git a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll index 8411a40..ff7a99a 100644 --- a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll +++ b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -enable-unsafe-fp-math -mtriple=i686-- | FileCheck %s +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; rdar://5902801 declare void @test2() diff --git a/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll b/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll index 6ebbb2e..0e0e20f 100644 --- a/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll +++ b/llvm/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -enable-unsafe-fp-math +; RUN: llc < %s ; <rdar://problem/12180135> target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" target triple = "i386-apple-macosx10.8.0" diff --git a/llvm/test/CodeGen/X86/avx-minmax.ll b/llvm/test/CodeGen/X86/avx-minmax.ll index 6da04c5..8e4b6c6 100644 --- a/llvm/test/CodeGen/X86/avx-minmax.ll +++ b/llvm/test/CodeGen/X86/avx-minmax.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -enable-no-nans-fp-math | FileCheck %s define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: maxpd: diff --git a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll index f827998..eb9de8a 100644 --- a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll +++ b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE ; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s ; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s ; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s define <16 x float> @test_max_v16f32(ptr %a_ptr, <16 x float> %b) { diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll index 5d9784a..1147d79 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll index b58bae9..1c4d9c6 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll index 92bdebb..a8ff969 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll new file mode 100644 index 0000000..13149d7 --- /dev/null +++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll @@ -0,0 +1,3021 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=CHECK,AVX512 + +; +; CTPOP +; + +define i32 @test_ctpop_i128(i128 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq %rsi, %rcx +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i128 @llvm.ctpop.i128(i128 %a0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i128(ptr %p0) nounwind { +; CHECK-LABEL: load_ctpop_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq 8(%rdi), %rcx +; CHECK-NEXT: popcntq (%rdi), %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.ctpop.i128(i128 %a0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i256(i256 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i256: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq %rcx, %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: popcntq %rdx, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: popcntq %rsi, %rdx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i256 @llvm.ctpop.i256(i256 %a0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i256(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i256: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 24(%rdi), %rcx +; SSE-NEXT: popcntq 16(%rdi), %rdx +; SSE-NEXT: popcntq 8(%rdi), %rsi +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 24(%rdi), %rax +; AVX2-NEXT: popcntq 16(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: popcntq 8(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: popcntq 16(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: popcntq 8(%rdi), %rdx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.ctpop.i256(i256 %a0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i512(i512 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i512: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: addl %eax, %r10d +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %r9, %rax +; CHECK-NEXT: popcntq %r8, %r8 +; CHECK-NEXT: addl %eax, %r8d +; CHECK-NEXT: addl %r10d, %r8d +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rcx, %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: popcntq %rdx, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: popcntq %rsi, %rdx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i512 @llvm.ctpop.i512(i512 %a0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i512(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i512: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 56(%rdi), %rax +; SSE-NEXT: popcntq 48(%rdi), %rcx +; SSE-NEXT: popcntq 40(%rdi), %rdx +; SSE-NEXT: popcntq 32(%rdi), %rsi +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 24(%rdi), %rax +; SSE-NEXT: addl %ecx, %esi +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq 16(%rdi), %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 8(%rdi), %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 56(%rdi), %rax +; AVX2-NEXT: popcntq 48(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 40(%rdi), %rax +; AVX2-NEXT: popcntq 32(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %ecx, %edx +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq 24(%rdi), %rcx +; AVX2-NEXT: popcntq 16(%rdi), %rsi +; AVX2-NEXT: popcntq 8(%rdi), %r8 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %ecx, %esi +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %esi, %eax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 56(%rdi), %rax +; AVX512-NEXT: popcntq 48(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 40(%rdi), %rax +; AVX512-NEXT: popcntq 32(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: addl %ecx, %edx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq 16(%rdi), %rcx +; AVX512-NEXT: popcntq 8(%rdi), %rsi +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %esi, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.ctpop.i512(i512 %a0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_ctpop_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: addl %eax, %r10d +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl %r11d, %eax +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %ebx, %ebx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: addl %r10d, %eax +; SSE-NEXT: addl %r11d, %ebx +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %r10d, %r10d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: addl %r11d, %r10d +; SSE-NEXT: addl %ebx, %r10d +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %ebx, %ebx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: addl %eax, %r10d +; SSE-NEXT: addl %r11d, %ebx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %r9, %rax +; SSE-NEXT: popcntq %r8, %r8 +; SSE-NEXT: addl %eax, %r8d +; SSE-NEXT: addl %ebx, %r8d +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %rcx, %rax +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq %rdx, %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq %rsi, %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %rdi, %rax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: addl %r8d, %eax +; SSE-NEXT: addl %r10d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctpop_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: addl %eax, %r10d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl %eax, %r11d +; AVX2-NEXT: addl %r10d, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r14 +; AVX2-NEXT: addl %eax, %ebx +; AVX2-NEXT: xorl %r10d, %r10d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: addl %r14d, %r10d +; AVX2-NEXT: addl %ebx, %r10d +; AVX2-NEXT: addl %r11d, %r10d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: xorl %r11d, %r11d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl %eax, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %r9, %rax +; AVX2-NEXT: popcntq %r8, %r8 +; AVX2-NEXT: addl %eax, %r8d +; AVX2-NEXT: addl %r11d, %r8d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %rcx, %rax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq %rdx, %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq %rsi, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %rdi, %rax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %r10d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctpop_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: addl %eax, %r10d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl %eax, %r11d +; AVX512-NEXT: addl %r10d, %r11d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: xorl %ebx, %ebx +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: xorl %r14d, %r14d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r14 +; AVX512-NEXT: addl %eax, %ebx +; AVX512-NEXT: xorl %r10d, %r10d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: addl %r14d, %r10d +; AVX512-NEXT: addl %ebx, %r10d +; AVX512-NEXT: addl %r11d, %r10d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: xorl %r11d, %r11d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl %eax, %r11d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %r9, %rax +; AVX512-NEXT: popcntq %r8, %r8 +; AVX512-NEXT: addl %eax, %r8d +; AVX512-NEXT: addl %r11d, %r8d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %rcx, %rax +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq %rdx, %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq %rsi, %rdx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %rdi, %rax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: addl %r8d, %eax +; AVX512-NEXT: addl %r10d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i1024: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 120(%rdi), %rax +; SSE-NEXT: popcntq 112(%rdi), %rcx +; SSE-NEXT: popcntq 104(%rdi), %rdx +; SSE-NEXT: popcntq 96(%rdi), %rsi +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 88(%rdi), %rax +; SSE-NEXT: addl %ecx, %esi +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 80(%rdi), %rdx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 72(%rdi), %rax +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq 64(%rdi), %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 56(%rdi), %rax +; SSE-NEXT: addl %esi, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 48(%rdi), %rdx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 40(%rdi), %rax +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: popcntq 32(%rdi), %rsi +; SSE-NEXT: addl %eax, %esi +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 24(%rdi), %rax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 16(%rdi), %rdx +; SSE-NEXT: popcntq 8(%rdi), %r8 +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %r8d, %eax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 120(%rdi), %rax +; AVX2-NEXT: popcntq 112(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 104(%rdi), %rax +; AVX2-NEXT: popcntq 96(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %ecx, %edx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 88(%rdi), %rax +; AVX2-NEXT: popcntq 80(%rdi), %rsi +; AVX2-NEXT: popcntq 72(%rdi), %r8 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq 64(%rdi), %rcx +; AVX2-NEXT: addl %eax, %esi +; AVX2-NEXT: addl %r8d, %ecx +; AVX2-NEXT: addl %esi, %ecx +; AVX2-NEXT: addl %edx, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 56(%rdi), %rax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq 48(%rdi), %rdx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: popcntq 40(%rdi), %rsi +; AVX2-NEXT: xorl %r8d, %r8d +; AVX2-NEXT: popcntq 32(%rdi), %r8 +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %esi, %r8d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 24(%rdi), %rax +; AVX2-NEXT: addl %edx, %r8d +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq 16(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: popcntq 8(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %esi, %eax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 120(%rdi), %rax +; AVX512-NEXT: popcntq 112(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 104(%rdi), %rax +; AVX512-NEXT: popcntq 96(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: addl %ecx, %edx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 88(%rdi), %rax +; AVX512-NEXT: popcntq 80(%rdi), %rsi +; AVX512-NEXT: popcntq 72(%rdi), %r8 +; AVX512-NEXT: addl %eax, %esi +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq 64(%rdi), %rcx +; AVX512-NEXT: addl %r8d, %ecx +; AVX512-NEXT: addl %esi, %ecx +; AVX512-NEXT: addl %edx, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 56(%rdi), %rax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq 48(%rdi), %rdx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: popcntq 40(%rdi), %rsi +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: xorl %r8d, %r8d +; AVX512-NEXT: popcntq 32(%rdi), %r8 +; AVX512-NEXT: addl %esi, %r8d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: addl %edx, %r8d +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq 16(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: popcntq 8(%rdi), %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %esi, %eax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %r8d, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +; +; CTLZ +; + +define i32 @test_ctlz_i128(i128 %a0) nounwind { +; SSE-LABEL: test_ctlz_i128: +; SSE: # %bb.0: +; SSE-NEXT: bsrq %rsi, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: lzcntq %rsi, %rcx +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: lzcntq %rsi, %rcx +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i128(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: bsrq %rcx, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 8(%rdi), %rcx +; AVX2-NEXT: lzcntq %rcx, %rdx +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 8(%rdi), %rcx +; AVX512-NEXT: lzcntq %rcx, %rdx +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i256(i256 %a0) nounwind { +; SSE-LABEL: test_ctlz_i256: +; SSE: # %bb.0: +; SSE-NEXT: bsrq %rcx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rdx, %r8 +; SSE-NEXT: xorl $63, %r8d +; SSE-NEXT: orl $64, %r8d +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: bsrq %rsi, %r9 +; SSE-NEXT: xorl $63, %r9d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: lzcntq %rcx, %rax +; AVX2-NEXT: lzcntq %rdx, %r8 +; AVX2-NEXT: addl $64, %r8d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %r8d +; AVX2-NEXT: lzcntq %rsi, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: lzcntq %rcx, %rax +; AVX512-NEXT: lzcntq %rdx, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: lzcntq %rsi, %r9 +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rcx, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i256(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i256: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rcx +; SSE-NEXT: movq 24(%rdi), %rdx +; SSE-NEXT: bsrq %rdx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rcx, %rsi +; SSE-NEXT: xorl $63, %esi +; SSE-NEXT: orl $64, %esi +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %esi +; SSE-NEXT: movq 8(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %r9 +; SSE-NEXT: xorl $63, %r9d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 16(%rdi), %rcx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: lzcntq %rcx, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %esi +; AVX2-NEXT: movq 8(%rdi), %r8 +; AVX2-NEXT: lzcntq %r8, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 8(%rdi), %rcx +; AVX512-NEXT: movq 16(%rdi), %rdx +; AVX512-NEXT: movq 24(%rdi), %rsi +; AVX512-NEXT: lzcntq %rsi, %rax +; AVX512-NEXT: lzcntq %rdx, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: lzcntq %rcx, %r9 +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i512(i512 %a0) nounwind { +; SSE-LABEL: test_ctlz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: bsrq %r11, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r10, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %r9, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r8, %rbx +; SSE-NEXT: xorl $63, %ebx +; SSE-NEXT: orl $64, %ebx +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: subl $-128, %ebx +; SSE-NEXT: movq %r10, %rax +; SSE-NEXT: orq %r11, %rax +; SSE-NEXT: cmovnel %r14d, %ebx +; SSE-NEXT: bsrq %rcx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rdx, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %rsi, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: lzcntq %r11, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %r10, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %r11, %r11 +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r9, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: lzcntq %r8, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r9, %r9 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: subl $-128, %ebx +; AVX2-NEXT: movq %r10, %rax +; AVX2-NEXT: orq %r11, %rax +; AVX2-NEXT: cmovnel %r14d, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rcx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %rdx, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: lzcntq %rsi, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: orq %r10, %r8 +; AVX2-NEXT: orq %r9, %r8 +; AVX2-NEXT: cmovnel %ebx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: lzcntq %r11, %rax +; AVX512-NEXT: lzcntq %r10, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %r9, %rax +; AVX512-NEXT: lzcntq %r8, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %r10, %rax +; AVX512-NEXT: orq %r11, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: lzcntq %rcx, %rax +; AVX512-NEXT: lzcntq %rdx, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %rsi, %r15 +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r15d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rcx, %rdx +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r11, %r9 +; AVX512-NEXT: orq %r10, %r8 +; AVX512-NEXT: orq %r9, %r8 +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: retq + %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i512(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 8(%rdi), %r10 +; SSE-NEXT: movq 16(%rdi), %r9 +; SSE-NEXT: movq 32(%rdi), %rcx +; SSE-NEXT: movq 40(%rdi), %rdx +; SSE-NEXT: movq 48(%rdi), %rsi +; SSE-NEXT: movq 56(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rsi, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %rdx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rcx, %r11 +; SSE-NEXT: xorl $63, %r11d +; SSE-NEXT: orl $64, %r11d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: movq 24(%rdi), %rbx +; SSE-NEXT: subl $-128, %r11d +; SSE-NEXT: movq %rsi, %rax +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: cmovnel %r14d, %r11d +; SSE-NEXT: bsrq %rbx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r9, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %r10, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rbx, %r9 +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 8(%rdi), %r10 +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: movq 32(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %rdx +; AVX2-NEXT: movq 48(%rdi), %rsi +; AVX2-NEXT: movq 56(%rdi), %r8 +; AVX2-NEXT: lzcntq %r8, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: lzcntq %rsi, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: lzcntq %rcx, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: subl $-128, %r11d +; AVX2-NEXT: movq %rsi, %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: cmovnel %ebx, %r11d +; AVX2-NEXT: movq 24(%rdi), %rbx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rbx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %r9, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: lzcntq %r10, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rbx, %r9 +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %r11d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 8(%rdi), %r11 +; AVX512-NEXT: movq 16(%rdi), %r9 +; AVX512-NEXT: movq 24(%rdi), %r10 +; AVX512-NEXT: movq 32(%rdi), %rcx +; AVX512-NEXT: movq 40(%rdi), %rdx +; AVX512-NEXT: movq 48(%rdi), %rsi +; AVX512-NEXT: movq 56(%rdi), %r8 +; AVX512-NEXT: lzcntq %r8, %rax +; AVX512-NEXT: lzcntq %rsi, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %rdx, %rax +; AVX512-NEXT: lzcntq %rcx, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %rsi, %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: lzcntq %r10, %rax +; AVX512-NEXT: lzcntq %r9, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: lzcntq %r11, %rdi +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r10, %r9 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %rdx +; AVX512-NEXT: orq %rsi, %rcx +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_ctlz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq %r9, %r11 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq %rdx, %r12 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r15, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r14, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: bsrq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: orl $64, %eax +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r15, %rdx +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: bsrq %r13, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: bsrq %rbx, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: bsrq %r10, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: bsrq %r8, %rbp +; SSE-NEXT: xorl $63, %ebp +; SSE-NEXT: orl $64, %ebp +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %rbx, %rcx +; SSE-NEXT: orq %r13, %rcx +; SSE-NEXT: cmovnel %edx, %ebp +; SSE-NEXT: addl $256, %ebp # imm = 0x100 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; SSE-NEXT: orq %r14, %rcx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: orq %r15, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; SSE-NEXT: bsrq %r14, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: bsrq %r15, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r11, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r9, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %eax, %edx +; SSE-NEXT: subl $-128, %edx +; SSE-NEXT: movq %r15, %rax +; SSE-NEXT: orq %r14, %rax +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: bsrq %r15, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r12, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r15, %r15 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: bsrq %rsi, %rdi +; SSE-NEXT: xorl $63, %edi +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %edi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r15, %r12 +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq %r14, %r11 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: orq %r13, %r10 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: orq %rbx, %r8 +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq %r9, %r14 +; AVX2-NEXT: movq %r8, %r11 +; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r12, %rcx +; AVX2-NEXT: xorl %r9d, %r9d +; AVX2-NEXT: lzcntq %r8, %r9 +; AVX2-NEXT: addl $64, %r9d +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ecx, %r9d +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r10, %rsi +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rax, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %esi, %ecx +; AVX2-NEXT: subl $-128, %ecx +; AVX2-NEXT: movq %r8, %rsi +; AVX2-NEXT: orq %r12, %rsi +; AVX2-NEXT: cmovnel %r9d, %ecx +; AVX2-NEXT: xorl %edi, %edi +; AVX2-NEXT: lzcntq %rbx, %rdi +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r15, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %edi, %esi +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: lzcntq %r13, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; AVX2-NEXT: xorl %edi, %edi +; AVX2-NEXT: lzcntq %r9, %rdi +; AVX2-NEXT: testq %r9, %r9 +; AVX2-NEXT: cmovnel %edi, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r15, %rdi +; AVX2-NEXT: orq %rbx, %rdi +; AVX2-NEXT: cmovnel %esi, %ebp +; AVX2-NEXT: addl $256, %ebp # imm = 0x100 +; AVX2-NEXT: movq %r10, %rdi +; AVX2-NEXT: orq %r12, %rdi +; AVX2-NEXT: movq %rax, %rsi +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdi, %rsi +; AVX2-NEXT: cmovnel %ecx, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r12, %rcx +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r11, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r14, %rsi +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovnel %esi, %ecx +; AVX2-NEXT: subl $-128, %ecx +; AVX2-NEXT: movq %rdi, %rsi +; AVX2-NEXT: orq %r12, %rsi +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq %rdx, %rdi +; AVX2-NEXT: lzcntq %rdx, %rdx +; AVX2-NEXT: addl $64, %edx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r10, %rax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %eax, %edx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: lzcntq %rax, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX2-NEXT: lzcntq %rsi, %r8 +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r10, %rdi +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: orq %r12, %r14 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r14, %r11 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; AVX2-NEXT: orq %rbx, %r9 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r15 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: orq %r15, %r13 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r9, %r13 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq %r9, %r14 +; AVX512-NEXT: movq %r8, %r11 +; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: lzcntq %r12, %rcx +; AVX512-NEXT: lzcntq %r8, %r9 +; AVX512-NEXT: addl $64, %r9d +; AVX512-NEXT: testq %r12, %r12 +; AVX512-NEXT: cmovnel %ecx, %r9d +; AVX512-NEXT: lzcntq %r10, %rsi +; AVX512-NEXT: lzcntq %rax, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %esi, %ecx +; AVX512-NEXT: subl $-128, %ecx +; AVX512-NEXT: movq %r8, %rsi +; AVX512-NEXT: orq %r12, %rsi +; AVX512-NEXT: cmovnel %r9d, %ecx +; AVX512-NEXT: lzcntq %rbx, %rdi +; AVX512-NEXT: lzcntq %r15, %rsi +; AVX512-NEXT: addl $64, %esi +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %edi, %esi +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: lzcntq %r13, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; AVX512-NEXT: lzcntq %r9, %rdi +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: cmovnel %edi, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %r15, %rdi +; AVX512-NEXT: orq %rbx, %rdi +; AVX512-NEXT: cmovnel %esi, %ebp +; AVX512-NEXT: addl $256, %ebp # imm = 0x100 +; AVX512-NEXT: movq %r10, %rdi +; AVX512-NEXT: orq %r12, %rdi +; AVX512-NEXT: movq %rax, %rsi +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rdi, %rsi +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: lzcntq %r12, %rcx +; AVX512-NEXT: testq %r12, %r12 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %r11, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: lzcntq %r14, %rsi +; AVX512-NEXT: testq %r14, %r14 +; AVX512-NEXT: cmovnel %esi, %ecx +; AVX512-NEXT: subl $-128, %ecx +; AVX512-NEXT: movq %rdi, %rsi +; AVX512-NEXT: orq %r12, %rsi +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq %rdx, %rdi +; AVX512-NEXT: lzcntq %rdx, %rdx +; AVX512-NEXT: addl $64, %edx +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: lzcntq %r10, %rax +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %edx +; AVX512-NEXT: lzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX512-NEXT: lzcntq %rsi, %r8 +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r10, %rdi +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: orq %r12, %r14 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r14, %r11 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; AVX512-NEXT: orq %rbx, %r9 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r15 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: orq %r15, %r13 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r9, %r13 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 40(%rdi), %rbp +; SSE-NEXT: movq 64(%rdi), %rbx +; SSE-NEXT: movq 72(%rdi), %r11 +; SSE-NEXT: movq 80(%rdi), %r12 +; SSE-NEXT: movq 88(%rdi), %r14 +; SSE-NEXT: movq 96(%rdi), %rsi +; SSE-NEXT: movq 104(%rdi), %r9 +; SSE-NEXT: movq 112(%rdi), %r10 +; SSE-NEXT: movq 120(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r10, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r9, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: bsrq %rsi, %rax +; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: orl $64, %eax +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r10, %rdx +; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: bsrq %r14, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: bsrq %r12, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: bsrq %r11, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: bsrq %rbx, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: orl $64, %r15d +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %ecx, %r15d +; SSE-NEXT: subl $-128, %r15d +; SSE-NEXT: movq %r12, %rcx +; SSE-NEXT: orq %r14, %rcx +; SSE-NEXT: cmovnel %edx, %r15d +; SSE-NEXT: movq 48(%rdi), %r12 +; SSE-NEXT: addl $256, %r15d # imm = 0x100 +; SSE-NEXT: movq %r9, %rcx +; SSE-NEXT: orq %r8, %rcx +; SSE-NEXT: movq %rsi, %rdx +; SSE-NEXT: orq %r10, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: movq 56(%rdi), %r13 +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: bsrq %r13, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r12, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movq %rbp, %r10 +; SSE-NEXT: bsrq %rbp, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq 32(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rbp +; SSE-NEXT: xorl $63, %ebp +; SSE-NEXT: orl $64, %ebp +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %r12, %rax +; SSE-NEXT: orq %r13, %rax +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: movq 24(%rdi), %r9 +; SSE-NEXT: bsrq %r9, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq 16(%rdi), %rsi +; SSE-NEXT: bsrq %rsi, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rdi +; SSE-NEXT: bsrq %rdi, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %rsi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq %r13, %r10 +; SSE-NEXT: orq %r12, %r8 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; SSE-NEXT: orq %r14, %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; SSE-NEXT: orq %rcx, %rbx +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r11, %rbx +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 48(%rdi), %r9 +; AVX2-NEXT: movq 56(%rdi), %rbp +; AVX2-NEXT: movq 64(%rdi), %r11 +; AVX2-NEXT: movq 72(%rdi), %r10 +; AVX2-NEXT: movq 80(%rdi), %r14 +; AVX2-NEXT: movq 88(%rdi), %rbx +; AVX2-NEXT: movq 96(%rdi), %rdx +; AVX2-NEXT: movq 104(%rdi), %r8 +; AVX2-NEXT: movq 112(%rdi), %rsi +; AVX2-NEXT: movq 120(%rdi), %r15 +; AVX2-NEXT: lzcntq %r15, %rax +; AVX2-NEXT: lzcntq %rsi, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: testq %r15, %r15 +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: lzcntq %r8, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %rsi, %r12 +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: orq %r15, %r12 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rbx, %rcx +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: lzcntq %r14, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %ecx, %r13d +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r10, %rcx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: lzcntq %r11, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %ecx, %r12d +; AVX2-NEXT: subl $-128, %r12d +; AVX2-NEXT: movq %r14, %rcx +; AVX2-NEXT: orq %rbx, %rcx +; AVX2-NEXT: cmovnel %r13d, %r12d +; AVX2-NEXT: addl $256, %r12d # imm = 0x100 +; AVX2-NEXT: movq %r8, %rcx +; AVX2-NEXT: orq %r15, %rcx +; AVX2-NEXT: orq %rsi, %rdx +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %eax, %r12d +; AVX2-NEXT: movq %rbp, %r14 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rbp, %rcx +; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r9, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rbp, %rbp +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: movq 32(%rdi), %r13 +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: lzcntq %r13, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: movq 40(%rdi), %r8 +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: lzcntq %r8, %rdx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %edx, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r9, %rdx +; AVX2-NEXT: orq %r14, %rdx +; AVX2-NEXT: cmovnel %eax, %ebp +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r9, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: lzcntq %rsi, %rdi +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %edi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %r9 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq %r14, %r8 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %r13 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: orq %r15, %rbx +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; AVX2-NEXT: orq %rbx, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX2-NEXT: orq %rcx, %r11 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r10, %r11 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 32(%rdi), %r14 +; AVX512-NEXT: movq 48(%rdi), %rbp +; AVX512-NEXT: movq 64(%rdi), %r11 +; AVX512-NEXT: movq 72(%rdi), %r10 +; AVX512-NEXT: movq 80(%rdi), %rdx +; AVX512-NEXT: movq 88(%rdi), %rbx +; AVX512-NEXT: movq 96(%rdi), %rsi +; AVX512-NEXT: movq 104(%rdi), %r9 +; AVX512-NEXT: movq 112(%rdi), %r8 +; AVX512-NEXT: movq 120(%rdi), %r15 +; AVX512-NEXT: lzcntq %r15, %rax +; AVX512-NEXT: lzcntq %r8, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: testq %r15, %r15 +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: lzcntq %r9, %r12 +; AVX512-NEXT: lzcntq %rsi, %rax +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %r8, %r12 +; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: orq %r15, %r12 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %rbx, %rcx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: lzcntq %rdx, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %ecx, %r13d +; AVX512-NEXT: lzcntq %r10, %rcx +; AVX512-NEXT: lzcntq %r11, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %ecx, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %rdx, %rcx +; AVX512-NEXT: orq %rbx, %rcx +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r9, %rcx +; AVX512-NEXT: orq %r15, %rcx +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rcx, %rsi +; AVX512-NEXT: movq 56(%rdi), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: lzcntq %r13, %rcx +; AVX512-NEXT: movq %rbp, %rsi +; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: lzcntq %rbp, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r13, %r13 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %r14, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq 40(%rdi), %r8 +; AVX512-NEXT: lzcntq %r8, %rdx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %edx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %rsi, %rdx +; AVX512-NEXT: orq %r13, %rdx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq 16(%rdi), %r9 +; AVX512-NEXT: lzcntq %r9, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq 24(%rdi), %rdx +; AVX512-NEXT: lzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: lzcntq %rsi, %rdi +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rdx, %r9 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq %r13, %r8 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %r14 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: orq %r15, %rbx +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; AVX512-NEXT: orq %rbx, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX512-NEXT: orq %rcx, %r11 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r10, %r11 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +; +; CTTZ +; + +define i32 @test_cttz_i128(i128 %a0) nounwind { +; SSE-LABEL: test_cttz_i128: +; SSE: # %bb.0: +; SSE-NEXT: rep bsfq %rdi, %rcx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq %rsi, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: tzcntq %rdi, %rcx +; AVX2-NEXT: tzcntq %rsi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: tzcntq %rdi, %rcx +; AVX512-NEXT: tzcntq %rsi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i128(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: rep bsfq %rcx, %rdx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 8(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: tzcntq %rcx, %rdx +; AVX2-NEXT: tzcntq 8(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movq (%rdi), %rcx +; AVX512-NEXT: tzcntq %rcx, %rdx +; AVX512-NEXT: tzcntq 8(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i256(i256 %a0) nounwind { +; SSE-LABEL: test_cttz_i256: +; SSE: # %bb.0: +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %rsi, %r8 +; SSE-NEXT: addl $64, %r8d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: rep bsfq %rdx, %r9 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rsi, %rdi +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: tzcntq %rsi, %r8 +; AVX2-NEXT: addl $64, %r8d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r8d +; AVX2-NEXT: tzcntq %rdx, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rsi, %rdi +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %rsi, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: tzcntq %rdx, %r9 +; AVX512-NEXT: tzcntq %rcx, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdi +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i256(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i256: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rcx +; SSE-NEXT: movq (%rdi), %rdx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %rsi, %r8 +; SSE-NEXT: addl $64, %r8d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: rep bsfq %rcx, %r9 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 24(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rsi, %rdx +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rdx +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: tzcntq %rdx, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %esi +; AVX2-NEXT: movq 16(%rdi), %r8 +; AVX2-NEXT: tzcntq %r8, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 24(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 16(%rdi), %rcx +; AVX512-NEXT: movq (%rdi), %rdx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %rsi, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: tzcntq %rcx, %r9 +; AVX512-NEXT: tzcntq 24(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i512(i512 %a0) nounwind { +; SSE-LABEL: test_cttz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %rsi, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %rcx, %r10 +; SSE-NEXT: addl $64, %r10d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r10d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: subl $-128, %r10d +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovnel %r11d, %r10d +; SSE-NEXT: rep bsfq %r8, %rax +; SSE-NEXT: rep bsfq %r9, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: rep bsfq %rbx, %r14 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %rcx, %rsi +; SSE-NEXT: orq %rdx, %rdi +; SSE-NEXT: orq %rsi, %rdi +; SSE-NEXT: cmovnel %r10d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: tzcntq %rsi, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %rcx, %r10 +; AVX2-NEXT: addl $64, %r10d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r10d +; AVX2-NEXT: subl $-128, %r10d +; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovnel %r11d, %r10d +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r8, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %r9, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: tzcntq %r11, %r14 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r11, %r11 +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %r8 +; AVX2-NEXT: cmovnel %ebx, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %rcx, %rsi +; AVX2-NEXT: orq %rdx, %rdi +; AVX2-NEXT: orq %rsi, %rdi +; AVX2-NEXT: cmovnel %r10d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %rsi, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %rcx, %r10 +; AVX512-NEXT: addl $64, %r10d +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %r10d +; AVX512-NEXT: subl $-128, %r10d +; AVX512-NEXT: movq %rdi, %rax +; AVX512-NEXT: orq %rsi, %rax +; AVX512-NEXT: cmovnel %ebx, %r10d +; AVX512-NEXT: tzcntq %r8, %rax +; AVX512-NEXT: tzcntq %r9, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: tzcntq %r11, %r14 +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %r8 +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %rcx, %rsi +; AVX512-NEXT: orq %rdx, %rdi +; AVX512-NEXT: orq %rsi, %rdi +; AVX512-NEXT: cmovnel %r10d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i512(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 48(%rdi), %r10 +; SSE-NEXT: movq 40(%rdi), %r9 +; SSE-NEXT: movq 24(%rdi), %r8 +; SSE-NEXT: movq 16(%rdi), %rdx +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq %rsi, %rbx +; SSE-NEXT: addl $64, %ebx +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %r8, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: subl $-128, %r11d +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovnel %ebx, %r11d +; SSE-NEXT: rep bsfq %r14, %rax +; SSE-NEXT: rep bsfq %r9, %rbx +; SSE-NEXT: addl $64, %ebx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: rep bsfq %r10, %r15 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 56(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %r14 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r8, %rsi +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 48(%rdi), %r10 +; AVX2-NEXT: movq 40(%rdi), %r9 +; AVX2-NEXT: movq 24(%rdi), %r8 +; AVX2-NEXT: movq 16(%rdi), %rdx +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %rsi, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %r8, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: subl $-128, %r11d +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovnel %ebx, %r11d +; AVX2-NEXT: movq 32(%rdi), %rbx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rbx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: tzcntq %r9, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r10, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 56(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %rbx +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: cmovnel %r11d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 48(%rdi), %r11 +; AVX512-NEXT: movq 40(%rdi), %r9 +; AVX512-NEXT: movq 32(%rdi), %r10 +; AVX512-NEXT: movq 24(%rdi), %r8 +; AVX512-NEXT: movq 16(%rdi), %rdx +; AVX512-NEXT: movq (%rdi), %rcx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: tzcntq %rcx, %rax +; AVX512-NEXT: tzcntq %rsi, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %r8, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %rcx, %rax +; AVX512-NEXT: orq %rsi, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: tzcntq %r10, %rax +; AVX512-NEXT: tzcntq %r9, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: tzcntq 56(%rdi), %rax +; AVX512-NEXT: tzcntq %r11, %rdi +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %r10 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: orq %rsi, %rcx +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_cttz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq %r9, %r13 +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: movq %rcx, %rbx +; SSE-NEXT: movq %rdx, %r10 +; SSE-NEXT: movq %rsi, %r9 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %r9, %r15 +; SSE-NEXT: addl $64, %r15d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: rep bsfq %r10, %r12 +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r12d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %rdi, %r12 +; SSE-NEXT: orq %r9, %r12 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: rep bsfq %r8, %r15 +; SSE-NEXT: movq %r13, %rcx +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: rep bsfq %r13, %r13 +; SSE-NEXT: addl $64, %r13d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %r15d, %r13d +; SSE-NEXT: rep bsfq %rdx, %r12 +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: addl $64, %r15d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %r12d, %r15d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; SSE-NEXT: subl $-128, %r15d +; SSE-NEXT: movq %r8, %rbp +; SSE-NEXT: orq %rcx, %rbp +; SSE-NEXT: cmovnel %r13d, %r15d +; SSE-NEXT: addl $256, %r15d # imm = 0x100 +; SSE-NEXT: movq %r9, %r13 +; SSE-NEXT: orq %rbx, %r13 +; SSE-NEXT: movq %rdi, %rbp +; SSE-NEXT: orq %r10, %rbp +; SSE-NEXT: orq %r13, %rbp +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: rep bsfq %r11, %r13 +; SSE-NEXT: rep bsfq %r12, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %r13d, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: addl $64, %r13d +; SSE-NEXT: rep bsfq %rsi, %rcx +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %ecx, %r13d +; SSE-NEXT: subl $-128, %r13d +; SSE-NEXT: movq %r11, %rcx +; SSE-NEXT: orq %r12, %rcx +; SSE-NEXT: cmovnel %eax, %r13d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SSE-NEXT: rep bsfq %rbp, %rcx +; SSE-NEXT: addl $64, %ecx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: rep bsfq %r8, %rsi +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rbp, %rdx +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r12 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r12, %r11 +; SSE-NEXT: cmovnel %r13d, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; SSE-NEXT: orq %rbx, %r9 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: orq %r14, %rdi +; SSE-NEXT: orq %r10, %rdi +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r9, %rdi +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq %r9, %rbx +; AVX2-NEXT: movq %r8, %r14 +; AVX2-NEXT: movq %rcx, %r11 +; AVX2-NEXT: movq %rdx, %r10 +; AVX2-NEXT: movq %rsi, %r9 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r9, %r15 +; AVX2-NEXT: addl $64, %r15d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r15d +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %r10, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r11, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %rdi, %r12 +; AVX2-NEXT: orq %r9, %r12 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r14, %r15 +; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %rbx, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovnel %r15d, %r12d +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %rcx, %r13 +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %rdx, %r15 +; AVX2-NEXT: addl $64, %r15d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %r13d, %r15d +; AVX2-NEXT: subl $-128, %r15d +; AVX2-NEXT: movq %r14, %r13 +; AVX2-NEXT: orq %rbx, %r13 +; AVX2-NEXT: cmovnel %r12d, %r15d +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: addl $256, %r15d # imm = 0x100 +; AVX2-NEXT: movq %r9, %r13 +; AVX2-NEXT: orq %r11, %r13 +; AVX2-NEXT: movq %rdi, %rbp +; AVX2-NEXT: orq %r10, %rbp +; AVX2-NEXT: orq %r13, %rbp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: cmovnel %eax, %r15d +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: tzcntq %r12, %rbp +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r13, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: tzcntq %r8, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %rsi, %rcx +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %ecx, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r12, %rcx +; AVX2-NEXT: orq %r13, %rcx +; AVX2-NEXT: cmovnel %eax, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %rbx, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: tzcntq %r8, %rsi +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rbx, %rdx +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r13, %r12 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: orq %r14, %rdi +; AVX2-NEXT: orq %r10, %rdi +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r9, %rdi +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq %r9, %r14 +; AVX512-NEXT: movq %r8, %r15 +; AVX512-NEXT: movq %rcx, %r11 +; AVX512-NEXT: movq %rdx, %r10 +; AVX512-NEXT: movq %rsi, %r9 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %r9, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %rdx, %r13 +; AVX512-NEXT: tzcntq %r11, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %r13d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %rdi, %r13 +; AVX512-NEXT: orq %r9, %r13 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: tzcntq %r8, %r12 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: tzcntq %r14, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %r12d, %r13d +; AVX512-NEXT: tzcntq %rcx, %rbp +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %ebp, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %r8, %rbp +; AVX512-NEXT: orq %r14, %rbp +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r9, %r13 +; AVX512-NEXT: orq %r11, %r13 +; AVX512-NEXT: movq %rdi, %rbp +; AVX512-NEXT: orq %rdx, %rbp +; AVX512-NEXT: orq %r13, %rbp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %rbx, %rbp +; AVX512-NEXT: tzcntq %r13, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: tzcntq %rsi, %rcx +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %rbx, %rcx +; AVX512-NEXT: orq %r13, %rcx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; AVX512-NEXT: tzcntq %r14, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX512-NEXT: tzcntq %r8, %rsi +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %esi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r14, %rdx +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r13, %rbx +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX512-NEXT: orq %r11, %r9 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: orq %r15, %rdi +; AVX512-NEXT: orq %r10, %rdi +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r9, %rdi +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 88(%rdi), %r10 +; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 40(%rdi), %rsi +; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 24(%rdi), %r9 +; SSE-NEXT: movq 16(%rdi), %r15 +; SSE-NEXT: movq (%rdi), %r8 +; SSE-NEXT: movq 8(%rdi), %r11 +; SSE-NEXT: rep bsfq %r8, %rax +; SSE-NEXT: rep bsfq %r11, %rdx +; SSE-NEXT: addl $64, %edx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %edx +; SSE-NEXT: rep bsfq %r15, %rbx +; SSE-NEXT: rep bsfq %r9, %rax +; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r15, %r15 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: movq 32(%rdi), %rbx +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: orq %r11, %r14 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: rep bsfq %rbx, %rdx +; SSE-NEXT: rep bsfq %rsi, %r12 +; SSE-NEXT: addl $64, %r12d +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %edx, %r12d +; SSE-NEXT: movq 48(%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: rep bsfq %r13, %rdx +; SSE-NEXT: rep bsfq %rcx, %r14 +; SSE-NEXT: addl $64, %r14d +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %edx, %r14d +; SSE-NEXT: subl $-128, %r14d +; SSE-NEXT: movq %rbx, %rdx +; SSE-NEXT: orq %rsi, %rdx +; SSE-NEXT: cmovnel %r12d, %r14d +; SSE-NEXT: movq 72(%rdi), %r12 +; SSE-NEXT: addl $256, %r14d # imm = 0x100 +; SSE-NEXT: movq %r11, %rdx +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: movq %r8, %r13 +; SSE-NEXT: orq %r15, %r13 +; SSE-NEXT: orq %rdx, %r13 +; SSE-NEXT: movq 64(%rdi), %r13 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: rep bsfq %r13, %rdx +; SSE-NEXT: rep bsfq %r12, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: rep bsfq %r10, %rbp +; SSE-NEXT: addl $64, %ebp +; SSE-NEXT: movq 80(%rdi), %r10 +; SSE-NEXT: rep bsfq %r10, %rcx +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %r13, %rcx +; SSE-NEXT: orq %r12, %rcx +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: movq 104(%rdi), %r9 +; SSE-NEXT: rep bsfq %r9, %rcx +; SSE-NEXT: addl $64, %ecx +; SSE-NEXT: movq 96(%rdi), %rdx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 120(%rdi), %rax +; SSE-NEXT: movq 112(%rdi), %rdi +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: rep bsfq %rdi, %rsi +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; SSE-NEXT: orq %r10, %r13 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r12, %r13 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; SSE-NEXT: orq %rcx, %r11 +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: orq %rbx, %r8 +; SSE-NEXT: orq %r15, %r8 +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r11, %r8 +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 72(%rdi), %r14 +; AVX2-NEXT: movq 64(%rdi), %r15 +; AVX2-NEXT: movq 56(%rdi), %r9 +; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %r10 +; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 32(%rdi), %rsi +; AVX2-NEXT: movq 24(%rdi), %rbp +; AVX2-NEXT: movq 16(%rdi), %rbx +; AVX2-NEXT: movq (%rdi), %r8 +; AVX2-NEXT: movq 8(%rdi), %r11 +; AVX2-NEXT: tzcntq %r8, %rax +; AVX2-NEXT: tzcntq %r11, %rdx +; AVX2-NEXT: addl $64, %edx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %edx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %rbx, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rbp, %rax +; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %r8, %r12 +; AVX2-NEXT: orq %r11, %r12 +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %rsi, %rdx +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %r10, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: cmovnel %edx, %r13d +; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %rcx, %rdx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %r9, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %r12d +; AVX2-NEXT: subl $-128, %r12d +; AVX2-NEXT: movq %rsi, %rdx +; AVX2-NEXT: orq %r10, %rdx +; AVX2-NEXT: cmovnel %r13d, %r12d +; AVX2-NEXT: addl $256, %r12d # imm = 0x100 +; AVX2-NEXT: movq %r11, %rdx +; AVX2-NEXT: orq %rbp, %rdx +; AVX2-NEXT: movq %r8, %r13 +; AVX2-NEXT: orq %rbx, %r13 +; AVX2-NEXT: orq %rdx, %r13 +; AVX2-NEXT: cmovnel %eax, %r12d +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %r15, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r14, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r15, %r15 +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: movq 88(%rdi), %rbp +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %rbp, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: movq 80(%rdi), %r10 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %r10, %rcx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %ecx, %r13d +; AVX2-NEXT: subl $-128, %r13d +; AVX2-NEXT: movq %r15, %rcx +; AVX2-NEXT: orq %r14, %rcx +; AVX2-NEXT: cmovnel %eax, %r13d +; AVX2-NEXT: movq 104(%rdi), %r9 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %r9, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq 96(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq 112(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 120(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: tzcntq %rsi, %rdi +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %edi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %rdx +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq %rbp, %r14 +; AVX2-NEXT: orq %r10, %r15 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r14, %r15 +; AVX2-NEXT: cmovnel %r13d, %eax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX2-NEXT: orq %rcx, %r11 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX2-NEXT: orq %rbx, %r8 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r11, %r8 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 88(%rdi), %rbp +; AVX512-NEXT: movq 72(%rdi), %r15 +; AVX512-NEXT: movq 56(%rdi), %r9 +; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rdi), %rcx +; AVX512-NEXT: movq 40(%rdi), %r10 +; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 32(%rdi), %rsi +; AVX512-NEXT: movq 24(%rdi), %r14 +; AVX512-NEXT: movq 16(%rdi), %rbx +; AVX512-NEXT: movq (%rdi), %r8 +; AVX512-NEXT: movq 8(%rdi), %r11 +; AVX512-NEXT: tzcntq %r8, %rax +; AVX512-NEXT: tzcntq %r11, %rdx +; AVX512-NEXT: addl $64, %edx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %edx +; AVX512-NEXT: tzcntq %rbx, %r12 +; AVX512-NEXT: tzcntq %r14, %rax +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %r8, %r12 +; AVX512-NEXT: orq %r11, %r12 +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: tzcntq %rsi, %rdx +; AVX512-NEXT: tzcntq %r10, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: cmovnel %edx, %r13d +; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: tzcntq %rcx, %rdx +; AVX512-NEXT: tzcntq %r9, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %rsi, %rdx +; AVX512-NEXT: orq %r10, %rdx +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r11, %rdx +; AVX512-NEXT: orq %r14, %rdx +; AVX512-NEXT: movq %r8, %r13 +; AVX512-NEXT: orq %rbx, %r13 +; AVX512-NEXT: orq %rdx, %r13 +; AVX512-NEXT: movq 64(%rdi), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %r13, %rdx +; AVX512-NEXT: tzcntq %r15, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r13, %r13 +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: movq %rbp, %r14 +; AVX512-NEXT: tzcntq %rbp, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq 80(%rdi), %r10 +; AVX512-NEXT: tzcntq %r10, %rcx +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %r13, %rcx +; AVX512-NEXT: orq %r15, %rcx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq 104(%rdi), %r9 +; AVX512-NEXT: tzcntq %r9, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq 96(%rdi), %rdx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq 112(%rdi), %rsi +; AVX512-NEXT: tzcntq 120(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: tzcntq %rsi, %rdi +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %rdx +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq %r14, %r15 +; AVX512-NEXT: orq %r10, %r13 +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r15, %r13 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX512-NEXT: orq %rcx, %r11 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX512-NEXT: orq %rbx, %r8 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r11, %r8 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/dag-fmf-cse.ll b/llvm/test/CodeGen/X86/dag-fmf-cse.ll index 609ccdc..cdcc082 100644 --- a/llvm/test/CodeGen/X86/dag-fmf-cse.ll +++ b/llvm/test/CodeGen/X86/dag-fmf-cse.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma | FileCheck %s ; If fast-math-flags are propagated correctly, the mul1 expression ; should be recognized as a factor in the last fsub, so we should diff --git a/llvm/test/CodeGen/X86/fabs.ll b/llvm/test/CodeGen/X86/fabs.ll index 82c82ac..4e6da83 100644 --- a/llvm/test/CodeGen/X86/fabs.ll +++ b/llvm/test/CodeGen/X86/fabs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3 | FileCheck %s --check-prefix=X87 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=X87UNSAFE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,-sse2,-sse3 -enable-no-nans-fp-math | FileCheck %s --check-prefix=X87UNSAFE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 declare float @fabsf(float) diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll index 0fe107c..aae6cda 100644 --- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll +++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll @@ -22,25 +22,24 @@ declare <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat>, <4 x bfloat>) define float @test_fmaximumnum(float %x, float %y) nounwind { ; SSE2-LABEL: test_fmaximumnum: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: js .LBB0_2 -; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: js .LBB0_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: jmp .LBB0_3 +; SSE2-NEXT: .LBB0_1: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: .LBB0_3: ; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: .LBB0_2: -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: cmpordss %xmm3, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: andps %xmm3, %xmm4 -; SSE2-NEXT: js .LBB0_4 -; SSE2-NEXT: # %bb.3: -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: .LBB0_4: -; SSE2-NEXT: maxss %xmm1, %xmm3 -; SSE2-NEXT: andnps %xmm3, %xmm0 -; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: maxss %xmm2, %xmm3 +; SSE2-NEXT: movaps %xmm3, %xmm0 +; SSE2-NEXT: cmpunordss %xmm3, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: andnps %xmm3, %xmm2 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximumnum: @@ -56,7 +55,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind { ; AVX1-NEXT: vmovdqa %xmm0, %xmm1 ; AVX1-NEXT: .LBB0_3: ; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX1-NEXT: vcmpordss %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -70,7 +69,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind { ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq ; @@ -95,7 +94,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind { ; X86-NEXT: vmovdqa %xmm2, %xmm0 ; X86-NEXT: .LBB0_3: ; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -371,26 +370,25 @@ define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math" ; SSE2-LABEL: test_fmaximumnum_nsz: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpordss %xmm0, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm3 -; SSE2-NEXT: andps %xmm0, %xmm3 -; SSE2-NEXT: maxss %xmm1, %xmm0 -; SSE2-NEXT: andnps %xmm0, %xmm2 -; SSE2-NEXT: orps %xmm3, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm1 +; SSE2-NEXT: cmpunordss %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximumnum_nsz: ; AVX1: # %bb.0: ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: test_fmaximumnum_nsz: ; AVX512: # %bb.0: ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX512-NEXT: vcmpordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vcmpunordss %xmm1, %xmm1, %k1 ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq @@ -404,9 +402,9 @@ define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math" ; X86: # %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm1 -; X86-NEXT: vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 +; X86-NEXT: vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm1 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -421,23 +419,22 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind { ; SSE2-NEXT: divss %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: js .LBB9_2 -; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: .LBB9_2: -; SSE2-NEXT: movaps %xmm3, %xmm2 -; SSE2-NEXT: cmpordss %xmm3, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm4 -; SSE2-NEXT: andps %xmm3, %xmm4 -; SSE2-NEXT: js .LBB9_4 -; SSE2-NEXT: # %bb.3: +; SSE2-NEXT: js .LBB9_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: jmp .LBB9_3 +; SSE2-NEXT: .LBB9_1: +; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: .LBB9_4: -; SSE2-NEXT: maxss %xmm1, %xmm3 +; SSE2-NEXT: .LBB9_3: +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: maxss %xmm2, %xmm3 +; SSE2-NEXT: movaps %xmm3, %xmm0 +; SSE2-NEXT: cmpunordss %xmm3, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm2 ; SSE2-NEXT: andnps %xmm3, %xmm2 -; SSE2-NEXT: orps %xmm4, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximumnum_combine_cmps: @@ -454,7 +451,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: .LBB9_3: ; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX1-NEXT: vcmpordss %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -469,7 +466,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind { ; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512F-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512F-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512F-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512F-NEXT: retq ; @@ -507,7 +504,7 @@ define float @test_fmaximumnum_combine_cmps(float %x, float %y) nounwind { ; X86-NEXT: vmovaps %xmm1, %xmm0 ; X86-NEXT: .LBB9_3: ; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -527,23 +524,23 @@ define float @test_fminimumnum(float %x, float %y) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: js .LBB10_2 -; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: .LBB10_2: -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: cmpordss %xmm3, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm4 -; SSE2-NEXT: andps %xmm3, %xmm4 -; SSE2-NEXT: js .LBB10_4 -; SSE2-NEXT: # %bb.3: +; SSE2-NEXT: js .LBB10_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: jmp .LBB10_3 +; SSE2-NEXT: .LBB10_1: +; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: .LBB10_4: -; SSE2-NEXT: minss %xmm0, %xmm3 +; SSE2-NEXT: .LBB10_3: +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: minss %xmm2, %xmm3 +; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: cmpunordss %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: andnps %xmm3, %xmm2 -; SSE2-NEXT: orps %xmm4, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fminimumnum: @@ -559,7 +556,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind { ; AVX1-NEXT: vmovdqa %xmm1, %xmm0 ; AVX1-NEXT: .LBB10_3: ; AVX1-NEXT: vminss %xmm2, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; @@ -573,7 +570,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind { ; AVX512-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vcmpordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vcmpunordss %xmm1, %xmm1, %k1 ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq @@ -599,7 +596,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind { ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: .LBB10_3: ; X86-NEXT: vminss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -857,26 +854,25 @@ define float @test_fminimumnum_nsz(float %x, float %y) nounwind { ; SSE2-LABEL: test_fminimumnum_nsz: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpordss %xmm0, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm3 -; SSE2-NEXT: andps %xmm0, %xmm3 -; SSE2-NEXT: minss %xmm1, %xmm0 -; SSE2-NEXT: andnps %xmm0, %xmm2 -; SSE2-NEXT: orps %xmm3, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm1 +; SSE2-NEXT: cmpunordss %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fminimumnum_nsz: ; AVX1: # %bb.0: ; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: test_fminimumnum_nsz: ; AVX512: # %bb.0: ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1 -; AVX512-NEXT: vcmpordss %xmm0, %xmm0, %k1 +; AVX512-NEXT: vcmpunordss %xmm1, %xmm1, %k1 ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq @@ -890,9 +886,9 @@ define float @test_fminimumnum_nsz(float %x, float %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm1 -; X86-NEXT: vminss {{[0-9]+}}(%esp), %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 +; X86-NEXT: vminss {{[0-9]+}}(%esp), %xmm0, %xmm1 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -907,23 +903,23 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind { ; SSE2-NEXT: divss %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: js .LBB19_2 -; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: .LBB19_2: -; SSE2-NEXT: movaps %xmm3, %xmm2 -; SSE2-NEXT: cmpordss %xmm3, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm4 -; SSE2-NEXT: andps %xmm3, %xmm4 -; SSE2-NEXT: js .LBB19_4 -; SSE2-NEXT: # %bb.3: +; SSE2-NEXT: js .LBB19_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: jmp .LBB19_3 +; SSE2-NEXT: .LBB19_1: +; SSE2-NEXT: movaps %xmm0, %xmm2 ; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: .LBB19_4: -; SSE2-NEXT: minss %xmm0, %xmm3 +; SSE2-NEXT: .LBB19_3: +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: minss %xmm2, %xmm3 +; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: cmpunordss %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: andnps %xmm3, %xmm2 -; SSE2-NEXT: orps %xmm4, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fminimumnum_combine_cmps: @@ -940,7 +936,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind { ; AVX1-NEXT: vmovaps %xmm2, %xmm0 ; AVX1-NEXT: .LBB19_3: ; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; @@ -955,7 +951,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind { ; AVX512F-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} ; AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512F-NEXT: vminss %xmm2, %xmm0, %xmm1 -; AVX512F-NEXT: vcmpordss %xmm0, %xmm0, %k1 +; AVX512F-NEXT: vcmpunordss %xmm1, %xmm1, %k1 ; AVX512F-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512F-NEXT: vmovaps %xmm1, %xmm0 ; AVX512F-NEXT: retq @@ -994,7 +990,7 @@ define float @test_fminimumnum_combine_cmps(float %x, float %y) nounwind { ; X86-NEXT: vmovaps %xmm2, %xmm0 ; X86-NEXT: .LBB19_3: ; X86-NEXT: vminss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -1022,9 +1018,9 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) { ; SSE2-NEXT: por %xmm4, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm1 ; SSE2-NEXT: minpd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: cmpordpd %xmm3, %xmm0 -; SSE2-NEXT: andpd %xmm0, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm3 ; SSE2-NEXT: andnpd %xmm1, %xmm0 ; SSE2-NEXT: orpd %xmm3, %xmm0 ; SSE2-NEXT: retq @@ -1034,7 +1030,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) { ; AVX-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; AVX-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vcmpunordpd %xmm1, %xmm1, %xmm2 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; @@ -1048,7 +1044,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) { ; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2 ; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 ; X86-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordpd %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) @@ -1084,19 +1080,17 @@ define <2 x double> @test_fminimumnum_vector_zero(<2 x double> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: xorpd %xmm1, %xmm1 ; SSE2-NEXT: minpd %xmm0, %xmm1 -; SSE2-NEXT: movapd %xmm0, %xmm2 -; SSE2-NEXT: cmpordpd %xmm0, %xmm2 -; SSE2-NEXT: andpd %xmm2, %xmm0 -; SSE2-NEXT: andnpd %xmm1, %xmm2 -; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm0 +; SSE2-NEXT: andnpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fminimumnum_vector_zero: ; AVX: # %bb.0: ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fminimumnum_vector_zero: @@ -1108,9 +1102,9 @@ define <2 x double> @test_fminimumnum_vector_zero(<2 x double> %x) { ; X86-LABEL: test_fminimumnum_vector_zero: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; X86-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1 +; X86-NEXT: vandnpd %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 0.>) ret <2 x double> %r @@ -1120,20 +1114,21 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) { ; SSE2-LABEL: test_fmaximumnum_vector_signed_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE2-NEXT: maxps %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpordps %xmm0, %xmm2 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: maxps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordps %xmm2, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fmaximumnum_vector_signed_zero: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fmaximumnum_vector_signed_zero: @@ -1144,9 +1139,9 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) { ; X86-LABEL: test_fmaximumnum_vector_signed_zero: ; X86: # %bb.0: ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm1 -; X86-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm0 +; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 +; X86-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; X86-NEXT: retl %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> <float -0., float -0., float -0., float -0.>) ret <4 x float> %r @@ -1155,13 +1150,14 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero(<4 x float> %x) { define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) { ; SSE2-LABEL: test_fminimumnum_vector_partially_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: cmpordpd %xmm0, %xmm1 -; SSE2-NEXT: xorpd %xmm2, %xmm2 -; SSE2-NEXT: movhpd {{.*#+}} xmm2 = xmm2[0],mem[0] +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; SSE2-NEXT: movapd %xmm1, %xmm2 ; SSE2-NEXT: minpd %xmm0, %xmm2 -; SSE2-NEXT: andpd %xmm1, %xmm0 -; SSE2-NEXT: andnpd %xmm2, %xmm1 +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm2, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 ; SSE2-NEXT: orpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1169,9 +1165,9 @@ define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fminimumnum_vector_partially_zero: @@ -1185,9 +1181,9 @@ define <2 x double> @test_fminimumnum_vector_partially_zero(<2 x double> %x) { ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X86-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; X86-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 +; X86-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 5.>) ret <2 x double> %r @@ -1212,9 +1208,9 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) { ; SSE2-NEXT: por %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm1 ; SSE2-NEXT: minpd %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: cmpordpd %xmm3, %xmm0 -; SSE2-NEXT: andpd %xmm0, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm3 ; SSE2-NEXT: andnpd %xmm1, %xmm0 ; SSE2-NEXT: orpd %xmm3, %xmm0 ; SSE2-NEXT: retq @@ -1226,7 +1222,7 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) { ; AVX-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; AVX-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vcmpunordpd %xmm1, %xmm1, %xmm2 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; @@ -1244,7 +1240,7 @@ define <2 x double> @test_fminimumnum_vector_different_zeros(<2 x double> %x) { ; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2 ; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 ; X86-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordpd %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double -0.>) @@ -1278,20 +1274,24 @@ define <4 x float> @test_fmaximumnum_vector_non_zero(<4 x float> %x) { define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) { ; SSE2-LABEL: test_fminimumnum_vector_nan: ; SSE2: # %bb.0: -; SSE2-NEXT: xorpd %xmm2, %xmm2 ; SSE2-NEXT: xorpd %xmm1, %xmm1 ; SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; SSE2-NEXT: minpd %xmm0, %xmm1 -; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: minpd %xmm0, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm2, %xmm0 +; SSE2-NEXT: andpd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm2, %xmm0 +; SSE2-NEXT: orpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fminimumnum_vector_nan: ; AVX: # %bb.0: ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm1[0],mem[0] -; AVX-NEXT: vminpd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fminimumnum_vector_nan: @@ -1306,7 +1306,7 @@ define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) { ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 -; X86-NEXT: vcmpordpd %xmm1, %xmm1, %xmm2 +; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 ; X86-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> <double 0., double 0x7fff000000000000>) @@ -1318,19 +1318,17 @@ define <2 x double> @test_fminimumnum_vector_zero_first(<2 x double> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: xorpd %xmm1, %xmm1 ; SSE2-NEXT: minpd %xmm0, %xmm1 -; SSE2-NEXT: movapd %xmm0, %xmm2 -; SSE2-NEXT: cmpordpd %xmm0, %xmm2 -; SSE2-NEXT: andpd %xmm2, %xmm0 -; SSE2-NEXT: andnpd %xmm1, %xmm2 -; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordpd %xmm1, %xmm0 +; SSE2-NEXT: andnpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fminimumnum_vector_zero_first: ; AVX: # %bb.0: ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fminimumnum_vector_zero_first: @@ -1342,9 +1340,9 @@ define <2 x double> @test_fminimumnum_vector_zero_first(<2 x double> %x) { ; X86-LABEL: test_fminimumnum_vector_zero_first: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vminpd %xmm0, %xmm1, %xmm1 -; X86-NEXT: vcmpordpd %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 +; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1 +; X86-NEXT: vandnpd %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> <double 0., double 0.>, <2 x double> %x) ret <2 x double> %r @@ -1378,20 +1376,21 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero_first(<4 x float> %x) { ; SSE2-LABEL: test_fmaximumnum_vector_signed_zero_first: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE2-NEXT: maxps %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpordps %xmm0, %xmm2 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: maxps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordps %xmm2, %xmm0 +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fmaximumnum_vector_signed_zero_first: ; AVX: # %bb.0: ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX10_2-LABEL: test_fmaximumnum_vector_signed_zero_first: @@ -1402,9 +1401,9 @@ define <4 x float> @test_fmaximumnum_vector_signed_zero_first(<4 x float> %x) { ; X86-LABEL: test_fmaximumnum_vector_signed_zero_first: ; X86: # %bb.0: ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm1 -; X86-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm0 +; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 +; X86-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; X86-NEXT: retl %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> <float -0., float -0., float -0., float -0.>, <4 x float> %x) ret <4 x float> %r @@ -1455,11 +1454,11 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) { ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: maxps %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: cmpordps %xmm0, %xmm2 -; SSE2-NEXT: andps %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: cmpunordps %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximumnum_v4f32_splat: @@ -1468,7 +1467,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) { ; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 ; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordps %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; @@ -1478,7 +1477,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) { ; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 ; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 +; AVX512-NEXT: vcmpunordps %xmm1, %xmm1, %xmm2 ; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq ; @@ -1494,7 +1493,7 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) { ; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 ; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordps %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordps %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %splatinsert = insertelement <4 x float> poison, float %y, i64 0 @@ -1506,134 +1505,130 @@ define <4 x float> @test_fmaximumnum_v4f32_splat(<4 x float> %x, float %y) { define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind { ; SSE2-LABEL: test_fmaximumnum_v4f16: ; SSE2: # %bb.0: -; SSE2-NEXT: subq $104, %rsp -; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: subq $136, %rsp +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] +; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[1,1] +; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload -; SSE2-NEXT: # xmm4 = mem[0],zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: js .LBB33_2 -; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movdqa %xmm4, %xmm2 -; SSE2-NEXT: .LBB33_2: -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: cmpordss %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: js .LBB33_4 -; SSE2-NEXT: # %bb.3: -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: .LBB33_4: -; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] -; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: maxss %xmm4, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm3, %xmm0 +; SSE2-NEXT: js .LBB33_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload +; SSE2-NEXT: jmp .LBB33_3 +; SSE2-NEXT: .LBB33_1: +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: .LBB33_3: +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: psrlq $48, %xmm0 +; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: psrlq $48, %xmm0 +; SSE2-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload -; SSE2-NEXT: # xmm4 = mem[0],zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: js .LBB33_6 +; SSE2-NEXT: js .LBB33_4 ; SSE2-NEXT: # %bb.5: -; SSE2-NEXT: movdqa %xmm4, %xmm2 -; SSE2-NEXT: .LBB33_6: -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: cmpordss %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: js .LBB33_8 -; SSE2-NEXT: # %bb.7: -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: .LBB33_8: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload +; SSE2-NEXT: jmp .LBB33_6 +; SSE2-NEXT: .LBB33_4: ; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; SSE2-NEXT: psrlq $48, %xmm1 -; SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload -; SSE2-NEXT: psrlq $48, %xmm1 -; SSE2-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill -; SSE2-NEXT: maxss %xmm4, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: .LBB33_6: +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload -; SSE2-NEXT: # xmm4 = mem[0],zero,zero,zero ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: js .LBB33_10 -; SSE2-NEXT: # %bb.9: -; SSE2-NEXT: movdqa %xmm4, %xmm2 -; SSE2-NEXT: .LBB33_10: -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: cmpordss %xmm2, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: js .LBB33_12 -; SSE2-NEXT: # %bb.11: -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: .LBB33_12: -; SSE2-NEXT: maxss %xmm4, %xmm2 +; SSE2-NEXT: js .LBB33_7 +; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload +; SSE2-NEXT: jmp .LBB33_9 +; SSE2-NEXT: .LBB33_7: +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: .LBB33_9: +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: andnps %xmm2, %xmm1 -; SSE2-NEXT: orps %xmm3, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd (%rsp), %xmm4 # 4-byte Folded Reload -; SSE2-NEXT: # xmm4 = mem[0],zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: js .LBB33_14 -; SSE2-NEXT: # %bb.13: -; SSE2-NEXT: movdqa %xmm4, %xmm2 -; SSE2-NEXT: .LBB33_14: -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: cmpordss %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: js .LBB33_16 -; SSE2-NEXT: # %bb.15: -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: .LBB33_16: -; SSE2-NEXT: maxss %xmm4, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm3, %xmm0 +; SSE2-NEXT: js .LBB33_10 +; SSE2-NEXT: # %bb.11: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa (%rsp), %xmm3 # 16-byte Reload +; SSE2-NEXT: jmp .LBB33_12 +; SSE2-NEXT: .LBB33_10: +; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: .LBB33_12: +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: maxss %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm1, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] @@ -1641,7 +1636,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; SSE2-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: addq $104, %rsp +; SSE2-NEXT: addq $136, %rsp ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximumnum_v4f16: @@ -1679,7 +1674,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vmaxss %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vcmpordss %xmm2, %xmm2, %xmm1 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1700,7 +1695,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX1-NEXT: vmovdqa %xmm0, %xmm2 ; AVX1-NEXT: .LBB33_6: ; AVX1-NEXT: vmaxss %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vcmpordss %xmm2, %xmm2, %xmm1 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1721,7 +1716,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX1-NEXT: vmovdqa %xmm0, %xmm2 ; AVX1-NEXT: .LBB33_9: ; AVX1-NEXT: vmaxss %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vcmpordss %xmm2, %xmm2, %xmm1 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill @@ -1742,7 +1737,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX1-NEXT: vmovdqa %xmm0, %xmm2 ; AVX1-NEXT: .LBB33_12: ; AVX1-NEXT: vmaxss %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vcmpordss %xmm2, %xmm2, %xmm1 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload @@ -1768,7 +1763,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1} ; AVX512-NEXT: vmaxss %xmm4, %xmm3, %xmm2 -; AVX512-NEXT: vcmpordss %xmm3, %xmm3, %k1 +; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1 ; AVX512-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2 ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3] @@ -1783,7 +1778,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vmaxss %xmm5, %xmm4, %xmm3 -; AVX512-NEXT: vcmpordss %xmm4, %xmm4, %k1 +; AVX512-NEXT: vcmpunordss %xmm3, %xmm3, %k1 ; AVX512-NEXT: vmovss %xmm4, %xmm3, %xmm3 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm3, %xmm3 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] @@ -1799,7 +1794,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vmaxss %xmm5, %xmm4, %xmm3 -; AVX512-NEXT: vcmpordss %xmm4, %xmm4, %k1 +; AVX512-NEXT: vcmpunordss %xmm3, %xmm3, %k1 ; AVX512-NEXT: vmovss %xmm4, %xmm3, %xmm3 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm3, %xmm3 ; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0] @@ -1814,7 +1809,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm5, %xmm6, %xmm6 {%k1} ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmaxss %xmm6, %xmm5, %xmm4 -; AVX512-NEXT: vcmpordss %xmm5, %xmm5, %k1 +; AVX512-NEXT: vcmpunordss %xmm4, %xmm4, %k1 ; AVX512-NEXT: vmovss %xmm5, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm4, %xmm4 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] @@ -1831,7 +1826,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vmaxss %xmm5, %xmm4, %xmm3 -; AVX512-NEXT: vcmpordss %xmm4, %xmm4, %k1 +; AVX512-NEXT: vcmpunordss %xmm3, %xmm3, %k1 ; AVX512-NEXT: vmovss %xmm4, %xmm3, %xmm3 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm3, %xmm3 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] @@ -1846,7 +1841,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm5, %xmm6, %xmm6 {%k1} ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmaxss %xmm6, %xmm5, %xmm4 -; AVX512-NEXT: vcmpordss %xmm5, %xmm5, %k1 +; AVX512-NEXT: vcmpunordss %xmm4, %xmm4, %k1 ; AVX512-NEXT: vmovss %xmm5, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm4, %xmm4 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] @@ -1860,7 +1855,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm5, %xmm6, %xmm6 {%k1} ; AVX512-NEXT: vmovss %xmm4, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmaxss %xmm6, %xmm5, %xmm4 -; AVX512-NEXT: vcmpordss %xmm5, %xmm5, %k1 +; AVX512-NEXT: vcmpunordss %xmm4, %xmm4, %k1 ; AVX512-NEXT: vmovss %xmm5, %xmm4, %xmm4 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm4, %xmm4 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 @@ -1875,7 +1870,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; AVX512-NEXT: vmovss %xmm1, %xmm5, %xmm5 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm5, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] @@ -1933,7 +1928,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: .LBB33_3: ; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __extendhfsf2 @@ -1955,7 +1950,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: .LBB33_6: ; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __truncsfhf2 @@ -1993,7 +1988,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: .LBB33_9: ; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __extendhfsf2 @@ -2015,7 +2010,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: .LBB33_12: ; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __truncsfhf2 @@ -2041,120 +2036,114 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %r15 ; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %r13 +; SSE2-NEXT: pushq %r12 ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: subq $56, %rsp -; SSE2-NEXT: pextrw $0, %xmm1, %r14d -; SSE2-NEXT: pextrw $0, %xmm0, %r15d -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psrld $16, %xmm2 -; SSE2-NEXT: pextrw $0, %xmm2, %eax ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: psrld $16, %xmm2 -; SSE2-NEXT: pextrw $0, %xmm2, %ecx +; SSE2-NEXT: psrlq $48, %xmm2 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: psrlq $48, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[1,1] +; SSE2-NEXT: pextrw $0, %xmm4, %ebp +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[1,1] +; SSE2-NEXT: pextrw $0, %xmm4, %r15d +; SSE2-NEXT: pextrw $0, %xmm0, %r12d +; SSE2-NEXT: pextrw $0, %xmm1, %r13d +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: psrld $16, %xmm1 +; SSE2-NEXT: pextrw $0, %xmm1, %ecx ; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: movd %ecx, %xmm3 +; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: testl %ecx, %ecx -; SSE2-NEXT: movdqa %xmm3, %xmm7 -; SSE2-NEXT: js .LBB34_2 -; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movdqa %xmm2, %xmm7 -; SSE2-NEXT: .LBB34_2: -; SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[1,1] -; SSE2-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,1],xmm0[1,1] -; SSE2-NEXT: movdqa %xmm7, %xmm0 -; SSE2-NEXT: cmpordss %xmm7, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: andps %xmm7, %xmm4 -; SSE2-NEXT: js .LBB34_4 -; SSE2-NEXT: # %bb.3: -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: .LBB34_4: -; SSE2-NEXT: pextrw $0, %xmm5, %ebp -; SSE2-NEXT: pextrw $0, %xmm6, %ebx -; SSE2-NEXT: maxss %xmm2, %xmm7 -; SSE2-NEXT: andnps %xmm7, %xmm0 -; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: js .LBB34_1 +; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: jmp .LBB34_3 +; SSE2-NEXT: .LBB34_1: +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm4, %xmm1 +; SSE2-NEXT: .LBB34_3: +; SSE2-NEXT: pextrw $0, %xmm2, %ebx +; SSE2-NEXT: pextrw $0, %xmm3, %r14d +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm3, %xmm0 ; SSE2-NEXT: callq __truncsfbf2@PLT ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: shll $16, %r15d -; SSE2-NEXT: movd %r15d, %xmm3 -; SSE2-NEXT: shll $16, %r14d -; SSE2-NEXT: movd %r14d, %xmm2 -; SSE2-NEXT: testl %r15d, %r15d -; SSE2-NEXT: movdqa %xmm3, %xmm1 -; SSE2-NEXT: js .LBB34_6 +; SSE2-NEXT: shll $16, %r13d +; SSE2-NEXT: movd %r13d, %xmm1 +; SSE2-NEXT: shll $16, %r12d +; SSE2-NEXT: movd %r12d, %xmm2 +; SSE2-NEXT: js .LBB34_4 ; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: jmp .LBB34_6 +; SSE2-NEXT: .LBB34_4: +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: .LBB34_6: -; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload -; SSE2-NEXT: psrlq $48, %xmm5 -; SSE2-NEXT: movdqa (%rsp), %xmm6 # 16-byte Reload -; SSE2-NEXT: psrlq $48, %xmm6 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: cmpordss %xmm1, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: andps %xmm1, %xmm4 -; SSE2-NEXT: js .LBB34_8 -; SSE2-NEXT: # %bb.7: -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: .LBB34_8: -; SSE2-NEXT: pextrw $0, %xmm5, %r15d -; SSE2-NEXT: pextrw $0, %xmm6, %r14d -; SSE2-NEXT: maxss %xmm2, %xmm1 -; SSE2-NEXT: andnps %xmm1, %xmm0 -; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm3, %xmm0 ; SSE2-NEXT: callq __truncsfbf2@PLT ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: shll $16, %ebx -; SSE2-NEXT: movd %ebx, %xmm1 +; SSE2-NEXT: shll $16, %r15d +; SSE2-NEXT: movd %r15d, %xmm1 ; SSE2-NEXT: shll $16, %ebp -; SSE2-NEXT: movd %ebp, %xmm3 -; SSE2-NEXT: testl %ebx, %ebx -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: js .LBB34_10 -; SSE2-NEXT: # %bb.9: -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: .LBB34_10: +; SSE2-NEXT: movd %ebp, %xmm2 +; SSE2-NEXT: js .LBB34_7 +; SSE2-NEXT: # %bb.8: ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: cmpordss %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: andps %xmm2, %xmm4 -; SSE2-NEXT: js .LBB34_12 -; SSE2-NEXT: # %bb.11: -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: .LBB34_12: -; SSE2-NEXT: maxss %xmm3, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: jmp .LBB34_9 +; SSE2-NEXT: .LBB34_7: +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: .LBB34_9: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm3, %xmm0 ; SSE2-NEXT: callq __truncsfbf2@PLT ; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; SSE2-NEXT: shll $16, %r14d ; SSE2-NEXT: movd %r14d, %xmm1 -; SSE2-NEXT: shll $16, %r15d -; SSE2-NEXT: movd %r15d, %xmm3 -; SSE2-NEXT: testl %r14d, %r14d -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: js .LBB34_14 -; SSE2-NEXT: # %bb.13: -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: .LBB34_14: +; SSE2-NEXT: shll $16, %ebx +; SSE2-NEXT: movd %ebx, %xmm2 +; SSE2-NEXT: js .LBB34_10 +; SSE2-NEXT: # %bb.11: ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: cmpordss %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: andps %xmm2, %xmm4 -; SSE2-NEXT: js .LBB34_16 -; SSE2-NEXT: # %bb.15: -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: .LBB34_16: -; SSE2-NEXT: maxss %xmm3, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: jmp .LBB34_12 +; SSE2-NEXT: .LBB34_10: +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: .LBB34_12: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: maxss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cmpunordss %xmm2, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm3, %xmm0 ; SSE2-NEXT: callq __truncsfbf2@PLT ; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] @@ -2164,6 +2153,8 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: addq $56, %rsp ; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r12 +; SSE2-NEXT: popq %r13 ; SSE2-NEXT: popq %r14 ; SSE2-NEXT: popq %r15 ; SSE2-NEXT: popq %rbp @@ -2205,7 +2196,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX1-NEXT: vpextrw $0, %xmm2, %ebp ; AVX1-NEXT: vpextrw $0, %xmm3, %r15d ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: callq __truncsfbf2@PLT ; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -2222,7 +2213,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX1-NEXT: vmovdqa %xmm2, %xmm0 ; AVX1-NEXT: .LBB34_6: ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: callq __truncsfbf2@PLT ; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -2239,7 +2230,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX1-NEXT: vmovdqa %xmm2, %xmm0 ; AVX1-NEXT: .LBB34_9: ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: callq __truncsfbf2@PLT ; AVX1-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill @@ -2256,7 +2247,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX1-NEXT: vmovdqa %xmm2, %xmm0 ; AVX1-NEXT: .LBB34_12: ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: callq __truncsfbf2@PLT ; AVX1-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload @@ -2305,7 +2296,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: callq __truncsfbf2@PLT ; AVX512-NEXT: vpextrw $0, %xmm0, {{[0-9]+}}(%rsp) @@ -2319,7 +2310,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: callq __truncsfbf2@PLT ; AVX512-NEXT: vpextrw $0, %xmm0, (%rsp) @@ -2333,7 +2324,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: callq __truncsfbf2@PLT ; AVX512-NEXT: vpextrw $0, %xmm0, {{[0-9]+}}(%rsp) @@ -2347,7 +2338,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmaxss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vcmpordss %xmm1, %xmm1, %k1 +; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: callq __truncsfbf2@PLT ; AVX512-NEXT: vpextrw $0, %xmm0, {{[0-9]+}}(%rsp) @@ -2400,7 +2391,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; X86-NEXT: vpextrw $0, %xmm2, %edi ; X86-NEXT: vpextrw $0, %xmm3, %ebp ; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: shll $16, %ecx @@ -2416,7 +2407,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; X86-NEXT: vmovdqa %xmm2, %xmm0 ; X86-NEXT: .LBB34_6: ; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __truncsfbf2 @@ -2436,7 +2427,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; X86-NEXT: vmovdqa %xmm2, %xmm0 ; X86-NEXT: .LBB34_9: ; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __truncsfbf2 @@ -2456,7 +2447,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n ; X86-NEXT: vmovdqa %xmm2, %xmm0 ; X86-NEXT: .LBB34_12: ; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm1 -; X86-NEXT: vcmpordss %xmm0, %xmm0, %xmm2 +; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-NEXT: calll __truncsfbf2 diff --git a/llvm/test/CodeGen/X86/fp-undef.ll b/llvm/test/CodeGen/X86/fp-undef.ll index 227f007..c358085 100644 --- a/llvm/test/CodeGen/X86/fp-undef.ll +++ b/llvm/test/CodeGen/X86/fp-undef.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ANY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s --check-prefix=ANY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ANY ; This is duplicated from tests for InstSimplify. If you're ; adding something here, you should probably add it there too. diff --git a/llvm/test/CodeGen/X86/fsxor-alignment.ll b/llvm/test/CodeGen/X86/fsxor-alignment.ll index 6fa4a31..32af5b9 100644 --- a/llvm/test/CodeGen/X86/fsxor-alignment.ll +++ b/llvm/test/CodeGen/X86/fsxor-alignment.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s ; Don't fold the incoming stack arguments into the xorps instructions used ; to do floating-point negations, because the arguments aren't vectors diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll index f710a30..bd997d1 100644 --- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll +++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse < %s | FileCheck %s ; The debug info in this test case was causing a crash because machine trace metrics ; did not correctly ignore debug instructions. The check lines ensure that the diff --git a/llvm/test/CodeGen/X86/neg_fp.ll b/llvm/test/CodeGen/X86/neg_fp.ll index 8020982..18ded50 100644 --- a/llvm/test/CodeGen/X86/neg_fp.ll +++ b/llvm/test/CodeGen/X86/neg_fp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- -mattr=+sse4.1 | FileCheck %s -; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization +; Test that when we don't, we don't do the optimization ; -0 - (A - B) to (B - A) because A==B, -0 != 0 define float @negfp(float %a, float %b) nounwind { diff --git a/llvm/test/CodeGen/X86/negate-add-zero.ll b/llvm/test/CodeGen/X86/negate-add-zero.ll index eb4e2d3..4884832 100644 --- a/llvm/test/CodeGen/X86/negate-add-zero.ll +++ b/llvm/test/CodeGen/X86/negate-add-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s | FileCheck %s ; PR3374 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/llvm/test/CodeGen/X86/recip-pic.ll b/llvm/test/CodeGen/X86/recip-pic.ll index d01ecc1..d2620e7 100644 --- a/llvm/test/CodeGen/X86/recip-pic.ll +++ b/llvm/test/CodeGen/X86/recip-pic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -enable-unsafe-fp-math -mcpu=slm -relocation-model=pic | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=slm -relocation-model=pic | FileCheck %s --check-prefix=CHECK define fastcc float @foo(float %x) unnamed_addr #0 { ; CHECK-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll index 6885456..51f3e52 100644 --- a/llvm/test/CodeGen/X86/sincos-opt.ll +++ b/llvm/test/CodeGen/X86/sincos-opt.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS -; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH -; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH ; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS -; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH +; RUN: llc < %s -mtriple=x86_64-fuchsia -mcpu=core2 | FileCheck %s --check-prefix=GNU_SINCOS_FASTMATH ; RUN: llc < %s -mtriple=x86_64-scei-ps4 -mcpu=btver2 | FileCheck %s --check-prefix=PS4_SINCOS ; RUN: llc < %s -mtriple=x86_64-sie-ps5 -mcpu=znver2 | FileCheck %s --check-prefix=PS4_SINCOS diff --git a/llvm/test/CodeGen/X86/sincos.ll b/llvm/test/CodeGen/X86/sincos.ll index 7903407..9206c25 100644 --- a/llvm/test/CodeGen/X86/sincos.ll +++ b/llvm/test/CodeGen/X86/sincos.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Make sure this testcase codegens to the sin and cos instructions, not calls -; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 | FileCheck %s ; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 | FileCheck %s declare float @sinf(float) readonly diff --git a/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll index c0beb6f..2822d40 100644 --- a/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll +++ b/llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CST --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 | FileCheck %s --check-prefix=CST --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx | FileCheck %s --check-prefix=CST --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s --check-prefix=CST --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CST --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64 -mattr=+avx | FileCheck %s --check-prefix=CST --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL ; Check that the constant used in the vectors are the right ones. ; SSE2: [[MASKCSTADDR:.LCPI[0-9_]+]]: diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic.ll b/llvm/test/Instrumentation/AllocToken/intrinsic.ll new file mode 100644 index 0000000..13aaa90 --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/intrinsic.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Test that the alloc-token pass lowers the intrinsic to a constant token ID. +; +; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i64 @llvm.alloc.token.id.i64(metadata) + +define i64 @test_intrinsic_lowering() { +; CHECK-LABEL: define i64 @test_intrinsic_lowering() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 0 +; +entry: + %token_no_ptr = call i64 @llvm.alloc.token.id.i64(metadata !0) + ret i64 %token_no_ptr +} + +define i64 @test_intrinsic_lowering_ptr() { +; CHECK-LABEL: define i64 @test_intrinsic_lowering_ptr() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 1 +; +entry: + %token_with_ptr = call i64 @llvm.alloc.token.id.i64(metadata !1) + ret i64 %token_with_ptr +} + +!0 = !{!"NoPointerType", i1 false} +!1 = !{!"PointerType", i1 true} diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic32.ll b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll new file mode 100644 index 0000000..eb5dbbe --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Test that the alloc-token pass lowers the intrinsic to a constant token ID. +; +; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-pc-linux-gnu" + +declare i32 @llvm.alloc.token.id.i32(metadata) + +define i32 @test_intrinsic_lowering() { +; CHECK-LABEL: define i32 @test_intrinsic_lowering() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 0 +; +entry: + %token_no_ptr = call i32 @llvm.alloc.token.id.i32(metadata !0) + ret i32 %token_no_ptr +} + +define i32 @test_intrinsic_lowering_ptr() { +; CHECK-LABEL: define i32 @test_intrinsic_lowering_ptr() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 1 +; +entry: + %token_with_ptr = call i32 @llvm.alloc.token.id.i32(metadata !1) + ret i32 %token_with_ptr +} + +!0 = !{!"NoPointerType", i1 false} +!1 = !{!"PointerType", i1 true} diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll index 1ddcd4b..1c869bd 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s +; RUN: opt -S -passes=msan -mattr=+sme -o - %s ; XFAIL: * diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll index 9caa89d..00cf3204 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s +; RUN: opt -S -passes=msan -mattr=+sme -o - %s ; XFAIL: * diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll new file mode 100644 index 0000000..3f43efa --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +; Manually reduced to show MSan leads to a compiler crash + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { + %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr) + ret void +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll new file mode 100644 index 0000000..cd04373 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll @@ -0,0 +1,340 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm) + ret void +} + +define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm) + ret void +} + + +define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) + ret void +} + +define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) + ret void +} + + +define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) + ret void +} + + +define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) + ret void +} + + + +define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) + ret void +} + +define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) + ret void +} + +define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice.7, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice.7, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory { +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0 + %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 0 + %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 1 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %2, <vscale x 2 x double> %5) + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %3, <vscale x 2 x double> %6) + ret void +} + + +define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice.7, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 0 + %3 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 1 + %4 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 2 + %5 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 0 + %8 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 1 + %9 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 2 + %10 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 0 + %13 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 1 + %14 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 2 + %15 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 0 + %18 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 1 + %19 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 2 + %20 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 3 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %2, <vscale x 4 x float> %7, <vscale x 4 x float> %12, <vscale x 4 x float> %17) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %3, <vscale x 4 x float> %8, <vscale x 4 x float> %13, <vscale x 4 x float> %18) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %4, <vscale x 4 x float> %9, <vscale x 4 x float> %14, <vscale x 4 x float> %19) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %5, <vscale x 4 x float> %10, <vscale x 4 x float> %15, <vscale x 4 x float> %20) + ret void +} + +define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, + <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice.7, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, + <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) + ret void +} + + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory { + %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } + @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, + <vscale x 16 x i8> %zm) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory { + %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } + @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, + <vscale x 8 x i16> %zm) + ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory { + %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } + @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, + <vscale x 4 x i32> %zm) + ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res +} + +define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory { + %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } + @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, + <vscale x 2 x i64> %zm) + ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res +} + + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory { + %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } + @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, + <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, + <vscale x 16 x i8> %zm) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory { + %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } + @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, + <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, + <vscale x 8 x i16> %zm) + ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory { + %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } + @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, + <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, + <vscale x 4 x i32> %zm) + ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res +} + +define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory { + %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } + @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, + <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, + <vscale x 2 x i64> %zm) + ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res +} +declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>) +declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>) +declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>) +declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>) +declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>) +declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll index 434ac84..3d759f7 100644 --- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll @@ -865,33 +865,6 @@ entry: ret float %r } -; Note that the `unsafe-fp-math` from the function attributes should be moved to -; individual instructions, with the shadow instructions NOT getting the attribute. -define float @param_add_return_float_unsafe_fp_math(float %a) #0 { -; CHECK-LABEL: @param_add_return_float_unsafe_fp_math( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64) -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1 -; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] -; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00 -; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] -; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64), ptr @__nsan_shadow_ret_tag, align 8 -; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8 -; CHECK-NEXT: ret float [[B]] -; -entry: - %b = fadd float %a, 1.0 - ret float %b -} - - define void @truncate(<2 x double> %0) sanitize_numerical_stability { ; DQQ-LABEL: @truncate( ; DQQ-NEXT: entry: @@ -941,4 +914,4 @@ entry: } -attributes #0 = { nounwind readonly uwtable sanitize_numerical_stability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #0 = { nounwind readonly uwtable sanitize_numerical_stability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" } diff --git a/llvm/test/LTO/AArch64/Inputs/foo.ll b/llvm/test/LTO/AArch64/Inputs/foo.ll deleted file mode 100644 index 961b0d4..0000000 --- a/llvm/test/LTO/AArch64/Inputs/foo.ll +++ /dev/null @@ -1,16 +0,0 @@ -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" - -define dso_local i32 @foo() #0 { -entry: - ret i32 42 -} - -attributes #0 = { noinline nounwind optnone uwtable } - -!llvm.module.flags = !{!0, !1, !2, !3} - -!0 = !{i32 8, !"branch-target-enforcement", i32 1} -!1 = !{i32 8, !"sign-return-address", i32 1} -!2 = !{i32 8, !"sign-return-address-all", i32 1} -!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1} diff --git a/llvm/test/LTO/AArch64/TestInputs/bar.ll b/llvm/test/LTO/AArch64/TestInputs/bar.ll new file mode 100644 index 0000000..7c2a753 --- /dev/null +++ b/llvm/test/LTO/AArch64/TestInputs/bar.ll @@ -0,0 +1,35 @@ +;; This file contains the new semantic of the branch-target-enforcement, sign-return-address. +;; Used for test mixing a mixed link case and also verify the import too in llc. + +; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define dso_local void @bar() #0 { +entry: + ret void +} +; CHECK-LABEL: bar: +; CHECK-NOT: hint +; CHECK-NOT: bti +; CHECK: ret + +define dso_local void @baz() #1 { +entry: + ret void +} + +; CHECK-LABEL: baz: +; CHECK: bti c +; CHECK: ret + +attributes #0 = { noinline nounwind optnone uwtable } +attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 8, !"branch-target-enforcement", i32 2} +!1 = !{i32 8, !"sign-return-address", i32 2} +!2 = !{i32 8, !"sign-return-address-all", i32 2} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 2} diff --git a/llvm/test/LTO/AArch64/TestInputs/fiz.ll b/llvm/test/LTO/AArch64/TestInputs/fiz.ll new file mode 100644 index 0000000..e578426 --- /dev/null +++ b/llvm/test/LTO/AArch64/TestInputs/fiz.ll @@ -0,0 +1,41 @@ +;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address. +;; Used for test mixing a mixed link case and also verify the import too in llc. + +; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare void @func() + +define i32 @fiz_on() #0 { +entry: + call void @func() + ret i32 42 +} + +; CHECK-LABEL: fiz_on: +; CHECK: paciasp +; CHECK: bl func +; CHECK: retaa + +define i32 @fiz_off() #1 { +entry: + ret i32 43 +} + +; CHECK-LABEL: fiz_off: +; CHECK-NOT: pac +; CHECK-NOT: hint +; CHECK-NOT: bti +; CHECK: ret + +attributes #0 = { noinline nounwind optnone uwtable } +attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement"="false" "sign-return-address"="none" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 8, !"branch-target-enforcement", i32 1} +!1 = !{i32 8, !"sign-return-address", i32 1} +!2 = !{i32 8, !"sign-return-address-all", i32 0} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 0} diff --git a/llvm/test/LTO/AArch64/TestInputs/foo.ll b/llvm/test/LTO/AArch64/TestInputs/foo.ll new file mode 100644 index 0000000..689d938 --- /dev/null +++ b/llvm/test/LTO/AArch64/TestInputs/foo.ll @@ -0,0 +1,38 @@ +;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address. +;; Used for test mixing a mixed link case and also verify the import too in llc. + +; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define i32 @foo_on() #0 { +entry: + ret i32 42 +} + +; CHECK-LABEL: foo_on: +; CHECK: pacibsp +; CHECK: mov +; CHECK: retab + +define i32 @foo_off() #1 { +entry: + ret i32 43 +} + +; CHECK-LABEL: foo_off: +; CHECK-NOT: pac +; CHECK-NOT: hint +; CHECK-NOT: bti +; CHECK: ret + +attributes #0 = { noinline nounwind optnone uwtable } +attributes #1 = { noinline nounwind optnone uwtable "branch-target-enforcement"="false" "sign-return-address"="none" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 8, !"branch-target-enforcement", i32 1} +!1 = !{i32 8, !"sign-return-address", i32 1} +!2 = !{i32 8, !"sign-return-address-all", i32 1} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1} diff --git a/llvm/test/LTO/AArch64/TestInputs/old.ll b/llvm/test/LTO/AArch64/TestInputs/old.ll new file mode 100644 index 0000000..2b1758b --- /dev/null +++ b/llvm/test/LTO/AArch64/TestInputs/old.ll @@ -0,0 +1,59 @@ +;; This file contains the previous semantic of the branch-target-enforcement, sign-return-address. +;; Used for test mixing a mixed link case and also verify the import too in llc. + +; RUN: llc -mattr=+pauth -mattr=+bti %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define i32 @old_bti() #0 { +entry: + ret i32 2 +} + +; CHECK-LABEL: old_bti: +; CHECK: bti c +; CHECK: mov +; CHECK: ret + +define i32 @old_pac() #1 { +entry: + ret i32 2 +} + +; CHECK-LABEL: old_pac: +; CHECK: paciasp +; CHECK: mov +; CHECK: retaa + + +define i32 @old_none() #2 { +entry: + ret i32 3 +} + +; CHECK-LABEL: old_none: +; CHECK-NOT: hint +; CHECK-NOT: paci +; CHECK-NOT: bti +; CHECK: ret + +declare i32 @func(i32) + +define i32 @old_none_leaf() #3 { +entry: + %0 = call i32 @func() + ret i32 %0 +} + +; CHECK-LABEL: old_none_leaf: +; CHECK: paciasp +; CHECK: bl func +; CHECK: retaa + +attributes #0 = { noinline nounwind optnone "branch-target-enforcement"="true" } +attributes #1 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="all" "sign-return-address-key"="a_key" } +attributes #2 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="none" } +attributes #3 = { noinline nounwind optnone "branch-target-enforcement"="false" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" } + +;; Intentionally no module flags diff --git a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll index b3c9828..aef8907 100644 --- a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll +++ b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll @@ -1,10 +1,10 @@ -; Testcase to check that module with different branch-target-enforcement can -; be mixed. -; +;; Testcase to check that module with different branch-target-enforcement can +;; be mixed. +;; ; RUN: llvm-as %s -o %t1.bc -; RUN: llvm-as %p/Inputs/foo.ll -o %t2.bc +; RUN: llvm-as %p/TestInputs/foo.ll -o %t2.bc ; RUN: llvm-lto -exported-symbol main \ -; RUN: -exported-symbol foo \ +; RUN: -exported-symbol foo_on \ ; RUN: -filetype=obj \ ; RUN: %t1.bc %t2.bc \ ; RUN: -o %t1.exe 2>&1 | FileCheck --allow-empty %s @@ -14,11 +14,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -declare i32 @foo(); +declare i32 @foo_on(); define i32 @main() "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" { entry: - %add = call i32 @foo() + %add = call i32 @foo_on() ret i32 %add } @@ -30,9 +30,12 @@ entry: ; CHECK-NOT: linking module flags 'branch-target-enforcement': IDs have conflicting values in ; CHECK-DUMP: <main>: +; CHECK-DUMP: paciasp +; CHECK-DUMP: str ; CHECK-DUMP: bl 0x8 <main+0x8> -; CHECK-DUMP: <foo>: +; CHECK-DUMP: <foo_on>: +; CHECK-DUMP: pacibsp -; `main` doesn't support BTI while `foo` does, so in the binary -; we should see only PAC which is supported by both. +;; `main` doesn't support BTI while `foo` does, so in the binary +;; we should see only PAC which is supported by both. ; CHECK-PROP: Properties: aarch64 feature: PAC
\ No newline at end of file diff --git a/llvm/test/LTO/AArch64/link-sign-return-address.ll b/llvm/test/LTO/AArch64/link-sign-return-address.ll new file mode 100644 index 0000000..df6276f --- /dev/null +++ b/llvm/test/LTO/AArch64/link-sign-return-address.ll @@ -0,0 +1,127 @@ +;; Testcase to check that module with different sign return address can +;; be mixed. +; +; RUN: llvm-as %s -o %t1.bc +; RUN: llvm-as %p/TestInputs/foo.ll -o %t2.bc +; RUN: llvm-as %p/TestInputs/fiz.ll -o %t3.bc +; RUN: llvm-as %p/TestInputs/bar.ll -o %t4.bc +; RUN: llvm-as %p/TestInputs/old.ll -o %t5.bc +; RUN: llvm-lto -exported-symbol main \ +; RUN: -exported-symbol foo_on \ +; RUN: -exported-symbol foo_off \ +; RUN: -exported-symbol fiz_on \ +; RUN: -exported-symbol fiz_off \ +; RUN: -exported-symbol bar \ +; RUN: -exported-symbol baz \ +; RUN: -exported-symbol old_bti \ +; RUN: -exported-symbol old_pac \ +; RUN: -exported-symbol old_none \ +; RUN: -filetype=obj \ +; RUN: %t5.bc %t4.bc %t3.bc %t2.bc %t1.bc \ +; RUN: -o %t1.exe 2>&1 +; RUN: llvm-objdump -d %t1.exe | FileCheck --check-prefix=CHECK-DUMP %s +; RUN: llvm-readelf -n %t1.exe | FileCheck --allow-empty --check-prefix=CHECK-PROP %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare i32 @foo_on(); +declare i32 @foo_off(); +declare i32 @fiz_on(); +declare i32 @fiz_off(); +declare void @baz(); +declare void @bar(); +declare i32 @old_bti(); +declare i32 @old_pac(); +declare i32 @old_none(); + +define i32 @main() #0 { +entry: + call i32 @foo_on() + call i32 @foo_off() + call i32 @fiz_on() + call i32 @fiz_off() + call void @bar() + call void @baz() + call i32 @old_bti() + call i32 @old_pac() + call i32 @old_none() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone } + +!llvm.module.flags = !{!0, !1, !2, !3 } +!0 = !{i32 8, !"branch-target-enforcement", i32 0} +!1 = !{i32 8, !"sign-return-address", i32 0} +!2 = !{i32 8, !"sign-return-address-all", i32 0} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 0} + + +; CHECK-DUMP-LABEL: <old_bti>: +; CHECK-DUMP-NEXT: bti c +; CHECK-DUMP-NEXT: mov w0, #0x2 +; CHECK-DUMP-NEXT: ret + +; CHECK-DUMP-LABEL: <old_pac>: +; CHECK-DUMP-NEXT: paciasp +; CHECK-DUMP-NEXT: mov w0, #0x2 +; CHECK-DUMP-NEXT: autiasp +; CHECK-DUMP-NEXT: ret + +; CHECK-DUMP-LABEL: <old_none>: +; CHECK-DUMP-NEXT: mov w0, #0x3 +; CHECK-DUMP-NEXT: ret + +; CHECK-DUMP-LABEL: <bar>: +; CHECK-DUMP-NEXT: ret + +; CHECK-DUMP-LABEL: <baz>: +; CHECK-DUMP-NEXT: bti c +; CHECK-DUMP-NEXT: ret + +;; fiz.ll represents a module with the old style of the function attributes. +;; fiz_on shall have PAC with A-key as it requested at module level. +; CHECK-DUMP-LABEL: <fiz_on>: +; CHECK-DUMP-NEXT: paciasp +; CHECK-DUMP-NEXT: str x30, [sp, #-0x10]! +; CHECK-DUMP-NEXT: bl 0x38 <fiz_on+0x8> +; CHECK-DUMP-NEXT: mov w0, #0x2a +; CHECK-DUMP-NEXT: ldr x30, [sp], #0x10 +; CHECK-DUMP-NEXT: autiasp +; CHECK-DUMP-NEXT: ret + +;; fiz_off shall not have BTI or PAC instructions as they are disabled at function scope. +; CHECK-DUMP-LABEL: <fiz_off>: +; CHECK-DUMP-NEXT: mov w0, #0x2b +; CHECK-DUMP-NEXT: ret + +;; foo.ll represents a module with the old style of the function attributes. +;; foo_on shall have PAC with B-key as it requested at module level. +; CHECK-DUMP-LABEL: <foo_on>: +; CHECK-DUMP-NEXT: pacibsp +; CHECK-DUMP-NEXT: mov w0, #0x2a +; CHECK-DUMP-NEXT: autibsp +; CHECK-DUMP-NEXT: ret + +;; foo_off shall not have BTI or PAC instructions as they are disabled at function scope. +; CHECK-DUMP-LABEL: <foo_off>: +; CHECK-DUMP-NEXT: mov w0, #0x2b +; CHECK-DUMP-NEXT: ret + +; CHECK-DUMP-LABEL: <main>: +; CHECK-DUMP-NOT: paciasp +; CHECK-DUMP-NEXT: str x30, +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl +; CHECK-DUMP-NEXT: bl + +;; `main` doesn't support PAC sign-return-address while `foo` does, so in the binary +;; we should not see anything. +; CHECK-PROP-NOT: Properties: aarch64 feature: PAC diff --git a/llvm/test/Linker/link-arm-and-thumb.ll b/llvm/test/Linker/link-arm-and-thumb.ll index a90f212..b5984bf 100644 --- a/llvm/test/Linker/link-arm-and-thumb.ll +++ b/llvm/test/Linker/link-arm-and-thumb.ll @@ -13,11 +13,11 @@ entry: ret i32 %add } -; CHECK: define i32 @main() { +; CHECK: define i32 @main() ; CHECK: define i32 @foo(i32 %a, i32 %b) [[ARM_ATTRS:#[0-9]+]] ; CHECK: define i32 @bar(i32 %a, i32 %b) [[THUMB_ATTRS:#[0-9]+]] -; CHECK: attributes [[ARM_ATTRS]] = { "target-features"="-thumb-mode" } -; CHECK: attributes [[THUMB_ATTRS]] = { "target-features"="+thumb-mode" } +; CHECK: attributes [[ARM_ATTRS]] = {{{.*}}"target-features"="-thumb-mode" } +; CHECK: attributes [[THUMB_ATTRS]] = {{{.*}}"target-features"="+thumb-mode" } ; STDERR-NOT: warning: Linking two modules of different target triples: diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index d3b44eb..8160544 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, s4, v7, v8 -// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, v4, 0, 1 -// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] v_add_min_i32 v2, v4, 3, s2 -// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] v_add_min_i32 v2, s4, 4, v2 -// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] v_add_min_i32 v2, v4, v7, 12345 -// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] v_add_max_i32 v2, s4, v7, v8 -// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] v_add_max_i32 v2, v4, 0, 1 -// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] v_add_max_i32 v2, v4, 3, s2 -// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] v_add_max_i32 v2, s4, 4, v2 -// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] v_add_max_i32 v2, v4, v7, 12345 -// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04] v_add_min_u32 v2, s4, v7, v8 -// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] v_add_min_u32 v2, v4, 0, 1 -// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] v_add_min_u32 v2, v4, 3, s2 -// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] v_add_min_u32 v2, s4, 4, v2 -// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] v_add_min_u32 v2, v4, v7, 12345 -// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] v_add_max_u32 v2, s4, v7, v8 -// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] v_add_max_u32 v2, v4, 0, 1 -// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] v_add_max_u32 v2, v4, 3, s2 -// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] v_add_max_u32 v2, s4, 4, v2 -// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] v_add_max_u32 v2, v4, v7, 12345 -// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_cvt_pk_bf16_f32 v5, v1, v2 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 98d07ac..d913bd2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, s4, v7, v8 -// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, v4, 0, 1 -// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] v_add_min_i32 v2, v4, 3, s2 -// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] v_add_min_i32 v2, s4, 4, v2 -// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] v_add_min_i32 v2, v4, v7, 12345 -// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] v_add_max_i32 v2, s4, v7, v8 -// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] v_add_max_i32 v2, v4, 0, 1 -// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] v_add_max_i32 v2, v4, 3, s2 -// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] v_add_max_i32 v2, s4, 4, v2 -// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] v_add_max_i32 v2, v4, v7, 12345 -// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_add_min_u32 v2, s4, v7, v8 -// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] v_add_min_u32 v2, v4, 0, 1 -// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] v_add_min_u32 v2, v4, 3, s2 -// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] v_add_min_u32 v2, s4, 4, v2 -// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] v_add_min_u32 v2, v4, v7, 12345 -// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] v_add_max_u32 v2, s4, v7, v8 -// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] v_add_max_u32 v2, v4, 0, 1 -// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] v_add_max_u32 v2, v4, 3, s2 -// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] v_add_max_u32 v2, s4, 4, v2 -// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] v_add_max_u32 v2, v4, v7, 12345 -// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_cvt_pk_bf16_f32 v5, v1, v2 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index 29bfa54..7af0bfe5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -237,64 +237,76 @@ # GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] + +0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] 0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf # GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/RISCV/xsfvfexp.s b/llvm/test/MC/RISCV/xsfvfexp.s new file mode 100644 index 0000000..bd6aecd --- /dev/null +++ b/llvm/test/MC/RISCV/xsfvfexp.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexp32e %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp32e %s \ +# RUN: | llvm-objdump -d --mattr=+xsfvfexp32e - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp32e %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexp16e %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp16e %s \ +# RUN: | llvm-objdump -d --mattr=+xsfvfexp16e - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexp16e %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+zvfbfmin,+xsfvfbfexp16e %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+zvfbfmin,+xsfvfbfexp16e %s \ +# RUN: | llvm-objdump -d --mattr=+xsfvfbfexp16e - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+zvfbfmin,+xsfvfbfexp16e %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +sf.vfexp.v v2, v5, v0.t +# CHECK-INST: sf.vfexp.v v2, v5, v0.t +# CHECK-ENCODING: [0x57,0x91,0x53,0x4c] +# CHECK-ERROR: instruction requires the following: 'Xsfvfbfexp16e', 'Xsfvfexp16e', or 'Xsfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction){{$}} +# CHECK-UNKNOWN: 4c539157 <unknown> diff --git a/llvm/test/MC/RISCV/xsfvfexpa.s b/llvm/test/MC/RISCV/xsfvfexpa.s new file mode 100644 index 0000000..317a103 --- /dev/null +++ b/llvm/test/MC/RISCV/xsfvfexpa.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+xsfvfexpa %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexpa %s \ +# RUN: | llvm-objdump -d --mattr=+xsfvfexpa - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+xsfvfexpa %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +sf.vfexpa.v v2, v5, v0.t +# CHECK-INST: sf.vfexpa.v v2, v5, v0.t +# CHECK-ENCODING: [0x57,0x11,0x53,0x4c] +# CHECK-ERROR: instruction requires the following: 'Xsfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction){{$}} +# CHECK-UNKNOWN: 4c531157 <unknown> diff --git a/llvm/test/ThinLTO/AArch64/aarch64_inline.ll b/llvm/test/ThinLTO/AArch64/aarch64_inline.ll new file mode 100644 index 0000000..401f66d --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/aarch64_inline.ll @@ -0,0 +1,86 @@ +;; Test verifies inlining happens cross module when module flags are upgraded. +;; `foo` and `main` are both old semantic while bar is the new semantic. +;; Regression test for #82763 + +; RUN: split-file %s %t +; RUN: opt -module-summary %t/foo.ll -o %t/foo.o +; RUN: opt -module-summary %t/bar.ll -o %t/bar.o +; RUN: opt -module-summary %t/main.ll -o %t/main.o +; RUN: llvm-lto2 run %t/main.o %t/foo.o %t/bar.o -save-temps \ +; RUN: -o %t/t.exe \ +; RUN: -r=%t/foo.o,foo,plx \ +; RUN: -r=%t/bar.o,bar,plx \ +; RUN: -r=%t/main.o,foo,l \ +; RUN: -r=%t/main.o,bar,l \ +; RUN: -r=%t/main.o,main,plx 2>&1 +; RUN: llvm-dis %t/t.exe.1.4.opt.bc -o - | FileCheck %s + +; CHECK: define dso_local noundef i32 @main() local_unnamed_addr #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 35 +; CHECK-NEXT: } + +; CHECK: attributes #0 = { {{.*}}"branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="b_key" } + +; CHECK: !llvm.module.flags = !{!0, !1, !2, !3} + +; CHECK: !0 = !{i32 8, !"branch-target-enforcement", i32 2} +; CHECK: !1 = !{i32 8, !"sign-return-address", i32 2} +; CHECK: !2 = !{i32 8, !"sign-return-address-all", i32 2} +; CHECK: !3 = !{i32 8, !"sign-return-address-with-bkey", i32 2} + + +;--- foo.ll +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define dso_local noundef i32 @foo() local_unnamed_addr #0 { +entry: + ret i32 34 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +!llvm.module.flags = !{!0, !1, !2, !3 } +!0 = !{i32 8, !"branch-target-enforcement", i32 1} +!1 = !{i32 8, !"sign-return-address", i32 1} +!2 = !{i32 8, !"sign-return-address-all", i32 1} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1} + +;--- bar.ll +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define dso_local noundef i32 @bar() local_unnamed_addr #0 { +entry: + ret i32 1 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="b_key" } +!llvm.module.flags = !{!0, !1, !2, !3 } +!0 = !{i32 8, !"branch-target-enforcement", i32 2} +!1 = !{i32 8, !"sign-return-address", i32 2} +!2 = !{i32 8, !"sign-return-address-all", i32 2} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 2} + +;--- main.ll +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare i32 @foo(); +declare i32 @bar(); + +define i32 @main() #0 { +entry: + %1 = call i32 @foo() + %2 = call i32 @bar() + %3 = add i32 %1, %2 + ret i32 %3 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } + +!llvm.module.flags = !{!0, !1, !2, !3 } +!0 = !{i32 8, !"branch-target-enforcement", i32 1} +!1 = !{i32 8, !"sign-return-address", i32 1} +!2 = !{i32 8, !"sign-return-address-all", i32 1} +!3 = !{i32 8, !"sign-return-address-with-bkey", i32 1} diff --git a/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll index b3f2e81..15ce3e3 100644 --- a/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll +++ b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll @@ -5,7 +5,7 @@ ; attributes that should be transferred only if it is on all of the regions. ; This includes the attributes, no-nans-fp-math, -; no-signed-zeros-fp-math, less-precise-fpmad, unsafe-fp-math, and +; no-signed-zeros-fp-math, less-precise-fpmad, and ; no-infs-fp-math. Only when each instance of similarity has these attributes ; can we say that the outlined function can have these attributes since that ; is the more general case for these attributes. @@ -101,7 +101,7 @@ entry: } attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "less-precise-fpmad"="true" -"unsafe-fp-math"="true" "no-infs-fp-math"="true"} +"no-infs-fp-math"="true"} ; CHECK: define internal void @outlined_ir_func_0(ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) [[ATTR1:#[0-9]+]] { ; CHECK: entry_to_outline: @@ -122,5 +122,5 @@ attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "les ; CHECK-NEXT: [[CL:%.*]] = load i32, ptr [[ARG2]], align 4 -; CHECK: attributes [[ATTR1]] = { minsize optsize "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "unsafe-fp-math"="false" } -; CHECK: attributes [[ATTR]] = { minsize optsize "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" } +; CHECK: attributes [[ATTR1]] = { minsize optsize "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" } +; CHECK: attributes [[ATTR]] = { minsize optsize "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll index 55ab430..da7eeda 100644 --- a/llvm/test/Transforms/Inline/attributes.ll +++ b/llvm/test/Transforms/Inline/attributes.ll @@ -601,46 +601,6 @@ define i32 @test_no-signed-zeros-fp-math3(i32 %i) "no-signed-zeros-fp-math"="tru ; CHECK-NEXT: ret i32 } -define i32 @unsafe-fp-math_callee0(i32 %i) "unsafe-fp-math"="false" { - ret i32 %i -; CHECK: @unsafe-fp-math_callee0(i32 %i) [[UNSAFE_FPMATH_FALSE:#[0-9]+]] { -; CHECK-NEXT: ret i32 -} - -define i32 @unsafe-fp-math_callee1(i32 %i) "unsafe-fp-math"="true" { - ret i32 %i -; CHECK: @unsafe-fp-math_callee1(i32 %i) [[UNSAFE_FPMATH_TRUE:#[0-9]+]] { -; CHECK-NEXT: ret i32 -} - -define i32 @test_unsafe-fp-math0(i32 %i) "unsafe-fp-math"="false" { - %1 = call i32 @unsafe-fp-math_callee0(i32 %i) - ret i32 %1 -; CHECK: @test_unsafe-fp-math0(i32 %i) [[UNSAFE_FPMATH_FALSE]] { -; CHECK-NEXT: ret i32 -} - -define i32 @test_unsafe-fp-math1(i32 %i) "unsafe-fp-math"="false" { - %1 = call i32 @unsafe-fp-math_callee1(i32 %i) - ret i32 %1 -; CHECK: @test_unsafe-fp-math1(i32 %i) [[UNSAFE_FPMATH_FALSE]] { -; CHECK-NEXT: ret i32 -} - -define i32 @test_unsafe-fp-math2(i32 %i) "unsafe-fp-math"="true" { - %1 = call i32 @unsafe-fp-math_callee0(i32 %i) - ret i32 %1 -; CHECK: @test_unsafe-fp-math2(i32 %i) [[UNSAFE_FPMATH_FALSE]] { -; CHECK-NEXT: ret i32 -} - -define i32 @test_unsafe-fp-math3(i32 %i) "unsafe-fp-math"="true" { - %1 = call i32 @unsafe-fp-math_callee1(i32 %i) - ret i32 %1 -; CHECK: @test_unsafe-fp-math3(i32 %i) [[UNSAFE_FPMATH_TRUE]] { -; CHECK-NEXT: ret i32 -} - ; Test that fn_ret_thunk_extern has no CompatRule; inlining is permitted. ; Test that fn_ret_thunk_extern has no MergeRule; fn_ret_thunk_extern is not ; propagated or dropped on the caller after inlining. @@ -693,6 +653,4 @@ define i32 @loader_replaceable_caller() { ; CHECK: attributes [[NO_NANS_FPMATH_TRUE]] = { "no-nans-fp-math"="true" } ; CHECK: attributes [[NO_SIGNED_ZEROS_FPMATH_FALSE]] = { "no-signed-zeros-fp-math"="false" } ; CHECK: attributes [[NO_SIGNED_ZEROS_FPMATH_TRUE]] = { "no-signed-zeros-fp-math"="true" } -; CHECK: attributes [[UNSAFE_FPMATH_FALSE]] = { "unsafe-fp-math"="false" } -; CHECK: attributes [[UNSAFE_FPMATH_TRUE]] = { "unsafe-fp-math"="true" } ; CHECK: attributes [[FNRETTHUNK_EXTERN]] = { fn_ret_thunk_extern } diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll index 453ca66..0b8eda4 100644 --- a/llvm/test/Transforms/InstCombine/select-and-or.ll +++ b/llvm/test/Transforms/InstCombine/select-and-or.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes=instcombine < %s | FileCheck %s declare void @use(i1) @@ -6,6 +6,10 @@ declare i1 @gen_i1() declare <2 x i1> @gen_v2i1() ; Should not be converted to "and", which has different poison semantics. +;. +; CHECK: @g1 = external global i16 +; CHECK: @g2 = external global i16 +;. define i1 @logical_and(i1 %a, i1 %b) { ; CHECK-LABEL: @logical_and( ; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false @@ -225,29 +229,29 @@ define i1 @not_not_true(i1 %x, i1 %y) { ; (!x && !y) --> !(x || y) -define i1 @not_not_false(i1 %x, i1 %y) { +define i1 @not_not_false(i1 %x, i1 %y) !prof !0 { ; CHECK-LABEL: @not_not_false( -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i1 %x, true %noty = xor i1 %y, true - %r = select i1 %notx, i1 %noty, i1 false + %r = select i1 %notx, i1 %noty, i1 false, !prof !1 ret i1 %r } ; (!x || !y) --> !(x && y) -define i1 @not_true_not(i1 %x, i1 %y) { +define i1 @not_true_not(i1 %x, i1 %y) !prof !0 { ; CHECK-LABEL: @not_true_not( -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false, !prof [[PROF1]] ; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i1 %x, true %noty = xor i1 %y, true - %r = select i1 %notx, i1 true, i1 %noty + %r = select i1 %notx, i1 true, i1 %noty, !prof !1 ret i1 %r } @@ -1348,3 +1352,12 @@ define i8 @test_logical_commuted_and_ne_a_b(i1 %other_cond, i8 %a, i8 %b) { %select = select i1 %or.cond, i8 %a, i8 %b ret i8 %select } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2} +;. diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index d88eaf8..3d97048 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -58,15 +58,15 @@ define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 { ret i1 %res } -define i1 @xor_and(i1 %c, i32 %X, i32 %Y) { +define i1 @xor_and(i1 %c, i32 %X, i32 %Y) !prof !0 { ; CHECK-LABEL: @xor_and( ; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]], !prof [[PROF2:![0-9]+]] ; CHECK-NEXT: ret i1 [[SEL]] ; %comp = icmp ult i32 %X, %Y - %sel = select i1 %c, i1 %comp, i1 false + %sel = select i1 %c, i1 %comp, i1 false, !prof !1 %res = xor i1 %sel, true ret i1 %res } @@ -97,15 +97,15 @@ define <2 x i1> @xor_and3(<2 x i1> %c, <2 x i32> %X, <2 x i32> %Y) { ret <2 x i1> %res } -define i1 @xor_or(i1 %c, i32 %X, i32 %Y) { +define i1 @xor_or(i1 %c, i32 %X, i32 %Y) !prof !0 { ; CHECK-LABEL: @xor_or( ; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false, !prof [[PROF2]] ; CHECK-NEXT: ret i1 [[SEL]] ; %comp = icmp ult i32 %X, %Y - %sel = select i1 %c, i1 true, i1 %comp + %sel = select i1 %c, i1 true, i1 %comp, !prof !1 %res = xor i1 %sel, true ret i1 %res } @@ -802,4 +802,5 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { ;. ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 2} ;. diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll new file mode 100644 index 0000000..7816781 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=instcombine | FileCheck %s +@A = extern_weak global float, align 4 + +; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true +; and false values, while %v1 and %phi.to.remove are actually the same. +; Fold the selection instruction %same.as.v1 to %v1. +define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; same.as.v1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} diff --git a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll index 0f18dc2..46e38d9 100644 --- a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll +++ b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -enable-unsafe-fp-math -S >%t +; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S >%t ; RUN: FileCheck %s < %t ; ModuleID = 't.cc' diff --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll index c7bc43e1..b61d659 100644 --- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll +++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -enable-unsafe-fp-math -S | \ +; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S | \ ; RUN: FileCheck %s ; This case is copied from test/Transforms/SimplifyCFG/AArch64/ diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 8b03db3..152f7db 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -172,6 +172,11 @@ static cl::opt<bool> cl::desc("Print MIR2Vec vocabulary contents"), cl::init(false)); +static cl::opt<bool> + PrintMIR2Vec("print-mir2vec", cl::Hidden, + cl::desc("Print MIR2Vec embeddings for functions"), + cl::init(false)); + static cl::list<std::string> IncludeDirs("I", cl::desc("include search path")); static cl::opt<bool> RemarksWithHotness( @@ -775,6 +780,11 @@ static int compileModule(char **argv, LLVMContext &Context) { PM.add(createMIR2VecVocabPrinterLegacyPass(errs())); } + // Add MIR2Vec printer if requested + if (PrintMIR2Vec) { + PM.add(createMIR2VecPrinterLegacyPass(errs())); + } + PM.add(createFreeMachineFunctionPass()); } else { if (Target->addPassesToEmitFile(PM, *OS, DwoOut ? &DwoOut->os() : nullptr, @@ -788,6 +798,11 @@ static int compileModule(char **argv, LLVMContext &Context) { if (PrintMIR2VecVocab) { PM.add(createMIR2VecVocabPrinterLegacyPass(errs())); } + + // Add MIR2Vec printer if requested + if (PrintMIR2Vec) { + PM.add(createMIR2VecPrinterLegacyPass(errs())); + } } Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(), diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 79f0527..11eb58e 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -202,9 +202,10 @@ static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."), aliasopt(IgnoreCase), cl::NotHidden); static list<std::string> Name( "name", - desc("Find and print all debug info entries whose name (DW_AT_name " - "attribute) matches the exact text in <pattern>. When used with the " - "the -regex option <pattern> is interpreted as a regular expression."), + desc("Find and print all debug info entries whose name " + "(DW_AT_name/DW_AT_linkage_name attribute) matches the exact text " + "in <pattern>. When used with the the -regex option <pattern> is " + "interpreted as a regular expression."), value_desc("pattern"), cat(DwarfDumpCategory)); static alias NameAlias("n", desc("Alias for --name"), aliasopt(Name), cl::NotHidden); diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index 47469983..966b1f0 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -27,6 +28,7 @@ using namespace llvm; using testing::ElementsAre; +using testing::ElementsAreArray; using testing::UnorderedElementsAre; namespace { @@ -772,48 +774,30 @@ TEST(STLExtrasTest, DropBeginTest) { SmallVector<int, 5> vec{0, 1, 2, 3, 4}; for (int n = 0; n < 5; ++n) { - int i = n; - for (auto &v : drop_begin(vec, n)) { - EXPECT_EQ(v, i); - i += 1; - } - EXPECT_EQ(i, 5); + EXPECT_THAT(drop_begin(vec, n), + ElementsAreArray(ArrayRef(&vec[n], vec.size() - n))); } } TEST(STLExtrasTest, DropBeginDefaultTest) { SmallVector<int, 5> vec{0, 1, 2, 3, 4}; - int i = 1; - for (auto &v : drop_begin(vec)) { - EXPECT_EQ(v, i); - i += 1; - } - EXPECT_EQ(i, 5); + EXPECT_THAT(drop_begin(vec), ElementsAre(1, 2, 3, 4)); } TEST(STLExtrasTest, DropEndTest) { SmallVector<int, 5> vec{0, 1, 2, 3, 4}; for (int n = 0; n < 5; ++n) { - int i = 0; - for (auto &v : drop_end(vec, n)) { - EXPECT_EQ(v, i); - i += 1; - } - EXPECT_EQ(i, 5 - n); + EXPECT_THAT(drop_end(vec, n), + ElementsAreArray(ArrayRef(vec.data(), vec.size() - n))); } } TEST(STLExtrasTest, DropEndDefaultTest) { SmallVector<int, 5> vec{0, 1, 2, 3, 4}; - int i = 0; - for (auto &v : drop_end(vec)) { - EXPECT_EQ(v, i); - i += 1; - } - EXPECT_EQ(i, 4); + EXPECT_THAT(drop_end(vec), ElementsAre(0, 1, 2, 3)); } TEST(STLExtrasTest, MapRangeTest) { diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h index 57c8c22..89f93e0 100644 --- a/llvm/unittests/CAS/OnDiskCommonUtils.h +++ b/llvm/unittests/CAS/OnDiskCommonUtils.h @@ -45,7 +45,7 @@ inline HashType digest(StringRef Data) { } inline ValueType valueFromString(StringRef S) { - ValueType Val; + ValueType Val = {}; llvm::copy(S.substr(0, sizeof(Val)), Val.data()); return Val; } diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp index 58f5dcc6..3c2e963 100644 --- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -283,7 +283,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { OnDiskGraphDB::FaultInPolicy::SingleNode); } -#if defined(EXPENSIVE_CHECKS) +#if defined(EXPENSIVE_CHECKS) && !defined(_WIN32) TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) { setMaxOnDiskCASMappingSize(); unittest::TempDir Temp("ondiskcas", /*Unique=*/true); diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp index 89c03b8..41512d0 100644 --- a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp @@ -33,13 +33,13 @@ TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) { } ValueType ValW = valueFromString("world"); - ArrayRef<char> Val; + std::optional<ArrayRef<char>> Val; ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded()); - EXPECT_EQ(Val, ArrayRef(ValW)); + EXPECT_EQ(*Val, ArrayRef(ValW)); ASSERT_THAT_ERROR( DB->put(digest("hello"), valueFromString("other")).moveInto(Val), Succeeded()); - EXPECT_EQ(Val, ArrayRef(ValW)); + EXPECT_EQ(*Val, ArrayRef(ValW)); { std::optional<ArrayRef<char>> Val; @@ -65,7 +65,7 @@ TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) { // Insert a lot of entries. for (unsigned I = 0; I < 1024 * 100; ++I) { std::string Index = Twine(I).str(); - ArrayRef<char> Val; + std::optional<ArrayRef<char>> Val; ASSERT_THAT_ERROR( DB->put(digest(Index), valueFromString(Index)).moveInto(Val), Succeeded()); diff --git a/llvm/unittests/CodeGen/MIR2VecTest.cpp b/llvm/unittests/CodeGen/MIR2VecTest.cpp index 11222b4..8710d6b 100644 --- a/llvm/unittests/CodeGen/MIR2VecTest.cpp +++ b/llvm/unittests/CodeGen/MIR2VecTest.cpp @@ -82,6 +82,9 @@ protected: return; } + // Set the data layout to match the target machine + M->setDataLayout(TM->createDataLayout()); + // Create a dummy function to get subtarget info FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *F = @@ -96,16 +99,27 @@ protected: } void TearDown() override { TII = nullptr; } -}; -// Function to find an opcode by name -static int findOpcodeByName(const TargetInstrInfo *TII, StringRef Name) { - for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) { - if (TII->getName(Opcode) == Name) - return Opcode; + // Find an opcode by name + int findOpcodeByName(StringRef Name) { + for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) { + if (TII->getName(Opcode) == Name) + return Opcode; + } + return -1; // Not found } - return -1; // Not found -} + + // Create a vocabulary with specific opcodes and embeddings + Expected<MIRVocabulary> + createTestVocab(std::initializer_list<std::pair<const char *, float>> opcodes, + unsigned dimension = 2) { + assert(TII && "TargetInstrInfo not initialized"); + VocabMap VMap; + for (const auto &[name, value] : opcodes) + VMap[name] = Embedding(dimension, value); + return MIRVocabulary::create(std::move(VMap), *TII); + } +}; TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { // Test that same base opcodes get same canonical indices @@ -118,10 +132,8 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { // Create a MIRVocabulary instance to test the mapping // Use a minimal MIRVocabulary to trigger canonical mapping construction - VocabMap VMap; Embedding Val = Embedding(64, 1.0f); - VMap["ADD"] = Val; - auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64); ASSERT_TRUE(static_cast<bool>(TestVocabOrErr)) << "Failed to create vocabulary: " << toString(TestVocabOrErr.takeError()); @@ -156,16 +168,16 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { 6880u); // X86 has >6880 unique base opcodes // Check that the embeddings for opcodes not in the vocab are zero vectors - int Add32rrOpcode = findOpcodeByName(TII, "ADD32rr"); + int Add32rrOpcode = findOpcodeByName("ADD32rr"); ASSERT_NE(Add32rrOpcode, -1) << "ADD32rr opcode not found"; EXPECT_TRUE(TestVocab[Add32rrOpcode].approximatelyEquals(Val)); - int Sub32rrOpcode = findOpcodeByName(TII, "SUB32rr"); + int Sub32rrOpcode = findOpcodeByName("SUB32rr"); ASSERT_NE(Sub32rrOpcode, -1) << "SUB32rr opcode not found"; EXPECT_TRUE( TestVocab[Sub32rrOpcode].approximatelyEquals(Embedding(64, 0.0f))); - int Mov32rrOpcode = findOpcodeByName(TII, "MOV32rr"); + int Mov32rrOpcode = findOpcodeByName("MOV32rr"); ASSERT_NE(Mov32rrOpcode, -1) << "MOV32rr opcode not found"; EXPECT_TRUE( TestVocab[Mov32rrOpcode].approximatelyEquals(Embedding(64, 0.0f))); @@ -178,9 +190,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { // Create a MIRVocabulary instance to test deterministic mapping // Use a minimal MIRVocabulary to trigger canonical mapping construction - VocabMap VMap; - VMap["ADD"] = Embedding(64, 1.0f); - auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64); ASSERT_TRUE(static_cast<bool>(TestVocabOrErr)) << "Failed to create vocabulary: " << toString(TestVocabOrErr.takeError()); @@ -189,8 +199,6 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { unsigned Index1 = TestVocab.getCanonicalIndexForBaseName(BaseName); unsigned Index2 = TestVocab.getCanonicalIndexForBaseName(BaseName); unsigned Index3 = TestVocab.getCanonicalIndexForBaseName(BaseName); - - EXPECT_EQ(Index1, Index2); EXPECT_EQ(Index2, Index3); // Test across multiple runs @@ -202,11 +210,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { // Test MIRVocabulary construction TEST_F(MIR2VecVocabTestFixture, VocabularyConstruction) { - VocabMap VMap; - VMap["ADD"] = Embedding(128, 1.0f); // Dimension 128, all values 1.0 - VMap["SUB"] = Embedding(128, 2.0f); // Dimension 128, all values 2.0 - - auto VocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto VocabOrErr = createTestVocab({{"ADD", 1.0f}, {"SUB", 2.0f}}, 128); ASSERT_TRUE(static_cast<bool>(VocabOrErr)) << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); auto &Vocab = *VocabOrErr; @@ -243,4 +247,251 @@ TEST_F(MIR2VecVocabTestFixture, EmptyVocabularyCreation) { } } +// Fixture for embedding related tests +class MIR2VecEmbeddingTestFixture : public MIR2VecVocabTestFixture { +protected: + std::unique_ptr<MachineModuleInfo> MMI; + MachineFunction *MF = nullptr; + + void SetUp() override { + MIR2VecVocabTestFixture::SetUp(); + // If base class setup was skipped (TII not initialized), skip derived setup + if (!TII) + GTEST_SKIP() << "Failed to get target instruction info in " + "the base class setup; Skipping test"; + + // Create a dummy function for MachineFunction + FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = + Function::Create(FT, Function::ExternalLinkage, "test", M.get()); + + MMI = std::make_unique<MachineModuleInfo>(TM.get()); + MF = &MMI->getOrCreateMachineFunction(*F); + } + + void TearDown() override { MIR2VecVocabTestFixture::TearDown(); } + + // Create a machine instruction + MachineInstr *createMachineInstr(MachineBasicBlock &MBB, unsigned Opcode) { + const MCInstrDesc &Desc = TII->get(Opcode); + // Create instruction - operands don't affect opcode-based embeddings + MachineInstr *MI = BuildMI(MBB, MBB.end(), DebugLoc(), Desc); + return MI; + } + + MachineInstr *createMachineInstr(MachineBasicBlock &MBB, + const char *OpcodeName) { + int Opcode = findOpcodeByName(OpcodeName); + if (Opcode == -1) + return nullptr; + return createMachineInstr(MBB, Opcode); + } + + void createMachineInstrs(MachineBasicBlock &MBB, + std::initializer_list<const char *> Opcodes) { + for (const char *OpcodeName : Opcodes) { + MachineInstr *MI = createMachineInstr(MBB, OpcodeName); + ASSERT_TRUE(MI != nullptr); + } + } +}; + +// Test factory method for creating embedder +TEST_F(MIR2VecEmbeddingTestFixture, CreateSymbolicEmbedder) { + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Emb = MIREmbedder::create(MIR2VecKind::Symbolic, *MF, V); + EXPECT_NE(Emb, nullptr); +} + +TEST_F(MIR2VecEmbeddingTestFixture, CreateInvalidMode) { + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Result = MIREmbedder::create(static_cast<MIR2VecKind>(-1), *MF, V); + EXPECT_FALSE(static_cast<bool>(Result)); +} + +// Test SymbolicMIREmbedder with simple target opcodes +TEST_F(MIR2VecEmbeddingTestFixture, TestSymbolicEmbedder) { + // Create a test vocabulary with specific values + auto VocabOrErr = createTestVocab( + { + {"NOOP", 1.0f}, // [1.0, 1.0, 1.0, 1.0] + {"RET", 2.0f}, // [2.0, 2.0, 2.0, 2.0] + {"TRAP", 3.0f} // [3.0, 3.0, 3.0, 3.0] + }, + 4); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + // Create a basic block using fixture's MF + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Use real X86 opcodes that should exist and not be pseudo + auto NoopInst = createMachineInstr(*MBB, "NOOP"); + ASSERT_TRUE(NoopInst != nullptr); + + auto RetInst = createMachineInstr(*MBB, "RET64"); + ASSERT_TRUE(RetInst != nullptr); + + auto TrapInst = createMachineInstr(*MBB, "TRAP"); + ASSERT_TRUE(TrapInst != nullptr); + + // Verify these are not pseudo instructions + ASSERT_FALSE(NoopInst->isPseudo()) << "NOOP is marked as pseudo instruction"; + ASSERT_FALSE(RetInst->isPseudo()) << "RET is marked as pseudo instruction"; + ASSERT_FALSE(TrapInst->isPseudo()) << "TRAP is marked as pseudo instruction"; + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test instruction embeddings + auto NoopEmb = Embedder->getMInstVector(*NoopInst); + auto RetEmb = Embedder->getMInstVector(*RetInst); + auto TrapEmb = Embedder->getMInstVector(*TrapInst); + + // Verify embeddings match expected values (accounting for weight scaling) + float ExpectedWeight = mir2vec::OpcWeight; // Global weight from command line + EXPECT_TRUE(NoopEmb.approximatelyEquals(Embedding(4, 1.0f * ExpectedWeight))); + EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(4, 2.0f * ExpectedWeight))); + EXPECT_TRUE(TrapEmb.approximatelyEquals(Embedding(4, 3.0f * ExpectedWeight))); + + // Test basic block embedding (should be sum of instruction embeddings) + auto MBBVector = Embedder->getMBBVector(*MBB); + + // Expected BB vector: NOOP + RET + TRAP = [1+2+3, 1+2+3, 1+2+3, 1+2+3] * + // weight = [6, 6, 6, 6] * weight + Embedding ExpectedMBBVector(4, 6.0f * ExpectedWeight); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedMBBVector)); + + // Test function embedding (should equal MBB embedding since we have one MBB) + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBBVector)); +} + +// Test embedder with multiple basic blocks +TEST_F(MIR2VecEmbeddingTestFixture, MultipleBasicBlocks) { + // Create a test vocabulary + auto VocabOrErr = createTestVocab({{"NOOP", 1.0f}, {"TRAP", 2.0f}}); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + + // Create two basic blocks using fixture's MF + MachineBasicBlock *MBB1 = MF->CreateMachineBasicBlock(); + MachineBasicBlock *MBB2 = MF->CreateMachineBasicBlock(); + MF->push_back(MBB1); + MF->push_back(MBB2); + + createMachineInstrs(*MBB1, {"NOOP", "NOOP"}); + createMachineInstr(*MBB2, "TRAP"); + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test basic block embeddings + auto MBB1Vector = Embedder->getMBBVector(*MBB1); + auto MBB2Vector = Embedder->getMBBVector(*MBB2); + + float ExpectedWeight = mir2vec::OpcWeight; + // BB1: NOOP + NOOP = 2 * ([1, 1] * weight) + Embedding ExpectedMBB1Vector(2, 2.0f * ExpectedWeight); + EXPECT_TRUE(MBB1Vector.approximatelyEquals(ExpectedMBB1Vector)); + + // BB2: TRAP = [2, 2] * weight + Embedding ExpectedMBB2Vector(2, 2.0f * ExpectedWeight); + EXPECT_TRUE(MBB2Vector.approximatelyEquals(ExpectedMBB2Vector)); + + // Function embedding: BB1 + BB2 = [2+2, 2+2] * weight = [4, 4] * weight + // Function embedding should be just the first BB embedding as the second BB + // is unreachable + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBB1Vector)); + + // Add a branch from BB1 to BB2 to make both reachable; now function embedding + // should be MBB1 + MBB2 + MBB1->addSuccessor(MBB2); + auto NewMFuncVector = Embedder->getMFunctionVector(); // Recompute embeddings + Embedding ExpectedFuncVector = MBB1Vector + MBB2Vector; + EXPECT_TRUE(NewMFuncVector.approximatelyEquals(ExpectedFuncVector)); +} + +// Test embedder with empty basic block +TEST_F(MIR2VecEmbeddingTestFixture, EmptyBasicBlock) { + + // Create an empty basic block + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Create embedder + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 2); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Embedder = SymbolicMIREmbedder::create(*MF, V); + ASSERT_TRUE(Embedder != nullptr); + + // Test that empty BB has zero embedding + auto MBBVector = Embedder->getMBBVector(*MBB); + Embedding ExpectedBBVector(2, 0.0f); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector)); + + // Function embedding should also be zero + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedBBVector)); +} + +// Test embedder with opcodes not in vocabulary +TEST_F(MIR2VecEmbeddingTestFixture, UnknownOpcodes) { + // Create a test vocabulary with limited entries + // SUB is intentionally not included + auto VocabOrErr = createTestVocab({{"ADD", 1.0f}}); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + + // Create a basic block + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Find opcodes + int AddOpcode = findOpcodeByName("ADD32rr"); + int SubOpcode = findOpcodeByName("SUB32rr"); + + ASSERT_NE(AddOpcode, -1) << "ADD32rr opcode not found"; + ASSERT_NE(SubOpcode, -1) << "SUB32rr opcode not found"; + + // Create instructions + MachineInstr *AddInstr = createMachineInstr(*MBB, AddOpcode); + MachineInstr *SubInstr = createMachineInstr(*MBB, SubOpcode); + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test instruction embeddings + auto AddVector = Embedder->getMInstVector(*AddInstr); + auto SubVector = Embedder->getMInstVector(*SubInstr); + + float ExpectedWeight = mir2vec::OpcWeight; + // ADD should have the embedding from vocabulary + EXPECT_TRUE( + AddVector.approximatelyEquals(Embedding(2, 1.0f * ExpectedWeight))); + + // SUB should have zero embedding (not in vocabulary) + EXPECT_TRUE(SubVector.approximatelyEquals(Embedding(2, 0.0f))); + + // Basic block embedding should be ADD + SUB = [1.0, 1.0] * weight + [0.0, + // 0.0] = [1.0, 1.0] * weight + const auto &MBBVector = Embedder->getMBBVector(*MBB); + Embedding ExpectedBBVector(2, 1.0f * ExpectedWeight); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector)); +} } // namespace diff --git a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp index b988a78a..08b4e8f 100644 --- a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp @@ -54,7 +54,9 @@ protected: return ContainerElementsMap(); ContainerElementsMap Result = SNs[0]->defs(); +#ifndef NDEBUG const ContainerElementsMap &Deps = SNs[0]->deps(); +#endif // NDEBUG for (size_t I = 1; I != SNs.size(); ++I) { assert(!DepsMustMatch || SNs[I]->deps() == Deps); diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 5d69a31..bfc1275 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -730,6 +730,11 @@ TEST(ParseArchString, MissingDepency) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), ""); } + + EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64i_xsfvfbfexp16e", true) + .takeError()), + "'xsfvfbfexp16e' requires 'zvfbfmin' or 'zvfbfa' extension to also " + "be specified"); } TEST(ParseArchString, RejectsUnrecognizedProfileNames) { @@ -1162,6 +1167,11 @@ R"(All available -march extensions for RISC-V xsfmm64t 0.6 xsfmmbase 0.6 xsfvcp 1.0 + xsfvfbfexp16e 0.5 + xsfvfexp16e 0.5 + xsfvfexp32e 0.5 + xsfvfexpa 0.2 + xsfvfexpa64e 0.2 xsfvfnrclipxfqf 1.0 xsfvfwmaccqqq 1.0 xsfvqmaccdod 1.0 diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp index 2a0f500..e108c4d 100644 --- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp @@ -33,7 +33,7 @@ TEST_F(VPDominatorTreeTest, DominanceNoRegionsTest) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2"); VPBasicBlock *VPBB3 = Plan.createVPBasicBlock("VPBB3"); VPBasicBlock *VPBB4 = Plan.createVPBasicBlock("VPBB4"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB1, VPBB4); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB1, VPBB4); VPBB2->setParent(R1); VPBB3->setParent(R1); @@ -99,7 +99,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB4 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4); R1BB2->setParent(R1); R1BB3->setParent(R1); VPBlockUtils::connectBlocks(VPBB0, R1); @@ -112,7 +112,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); @@ -171,12 +171,12 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB#"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3); R2BB2->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB2, R2BB1); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index db64c75..c1791dfa 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -269,7 +269,7 @@ TEST_F(VPBasicBlockTest, getPlan) { // VPBasicBlock is the entry into the VPlan, followed by a region. VPBasicBlock *R1BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2); VPBlockUtils::connectBlocks(R1BB1, R1BB2); VPBlockUtils::connectBlocks(VPBB1, R1); @@ -286,12 +286,12 @@ TEST_F(VPBasicBlockTest, getPlan) { VPlan &Plan = getPlan(); VPBasicBlock *R1BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2); VPBlockUtils::connectBlocks(R1BB1, R1BB2); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -369,7 +369,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB4 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4); R1BB2->setParent(R1); R1BB3->setParent(R1); VPBlockUtils::connectBlocks(VPBB0, R1); @@ -382,7 +382,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); @@ -467,12 +467,12 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB3"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3); R2BB2->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB2, R2BB1); @@ -537,10 +537,10 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPlan &Plan = getPlan(); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2); R2->setParent(R1); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -590,14 +590,14 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { // VPlan &Plan = getPlan(); VPBasicBlock *R3BB1 = Plan.createVPBasicBlock("R3BB1"); - VPRegionBlock *R3 = Plan.createVPRegionBlock(R3BB1, R3BB1, "R3"); + VPRegionBlock *R3 = Plan.createLoopRegion("R3", R3BB1, R3BB1); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R3); R3->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R3); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2); R2->setParent(R1); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -687,7 +687,7 @@ TEST_F(VPBasicBlockTest, reassociateBlocks) { VPlan &Plan = getPlan(); VPBasicBlock *VPBB1 = Plan.createVPBasicBlock("VPBB1"); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); auto *WidenPhi = new VPWidenPHIRecipe(nullptr); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index c2f045b..50ad4d5 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -32,7 +32,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPBB2->appendRecipe(CanIV); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -71,7 +71,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPBB2->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -117,7 +117,7 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBlockUtils::connectBlocks(VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); @@ -160,7 +160,7 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPBB3->appendRecipe(CanIV); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB3, VPBB3); VPBlockUtils::connectBlocks(VPBB1, VPBB2); VPBlockUtils::connectBlocks(VPBB2, R1); VPBlockUtils::connectBlocks(VPBB4, Plan.getScalarHeader()); @@ -200,7 +200,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(VPBB1, R1); @@ -237,7 +237,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBlockUtils::connectBlocks(VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB2, VPBB3); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); @@ -270,7 +270,7 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { VPBB1->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -302,7 +302,7 @@ TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { VPBB2->appendRecipe(IRPhi); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index d1b14fb..0b90f91 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -285,7 +285,7 @@ emitGetNamedOperandIdx(raw_ostream &OS, static void emitGetOperandIdxName(raw_ostream &OS, - MapVector<StringRef, unsigned> OperandNameToID, + const MapVector<StringRef, unsigned> &OperandNameToID, const MapVector<SmallVector<int>, unsigned> &OperandMap, unsigned MaxNumOperands, unsigned NumOperandNames) { OS << "LLVM_READONLY OpName getOperandIdxName(uint16_t Opcode, int16_t Idx) " diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn index 5efc153..51911d7 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn @@ -47,7 +47,6 @@ static_library("Clang") { "ClangASTImporter.cpp", "ClangASTMetadata.cpp", "ClangASTSource.cpp", - "ClangDeclVendor.cpp", "ClangExpressionDeclMap.cpp", "ClangExpressionHelper.cpp", "ClangExpressionParser.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 38ba466..df9ddf9 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -45,6 +45,7 @@ static_library("Support") { "ARMAttributeParser.cpp", "ARMBuildAttributes.cpp", "ARMWinEH.cpp", + "AllocToken.cpp", "Allocator.cpp", "AutoConvert.cpp", "BalancedPartitioning.cpp", diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index a7e2705..f883145 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -92,12 +92,12 @@ class ShellEnvironment(object): we maintain a dir stack for pushd/popd. """ - def __init__(self, cwd, env, umask=-1, ulimit={}): + def __init__(self, cwd, env, umask=-1, ulimit=None): self.cwd = cwd self.env = dict(env) self.umask = umask self.dirStack = [] - self.ulimit = ulimit + self.ulimit = ulimit if ulimit else {} def change_dir(self, newdir): if os.path.isabs(newdir): diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt new file mode 100644 index 0000000..011d6db --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt @@ -0,0 +1,3 @@ +# RUN: %{python} %S/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index e843277..9a8febd 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -5,9 +5,9 @@ # as well. # UNSUPPORTED: system-windows, system-solaris -# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s +# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical | FileCheck %s -# CHECK: -- Testing: 2 tests{{.*}} +# CHECK: -- Testing: 3 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) # CHECK: ulimit -n @@ -16,3 +16,6 @@ # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) # CHECK: ulimit -n 50 # CHECK: RLIMIT_NOFILE=50 + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}}) +# CHECK-NOT: RLIMIT_NOFILE=50 |
