diff options
247 files changed, 23203 insertions, 1276 deletions
diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 951fc16..6303a11 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -193,7 +193,7 @@ jobs: uses: llvm/actions/install-ninja@main - name: Build and Test run: | - source <(git diff --name-only HEAD~2..HEAD | python3 .ci/compute_projects.py) + source <(git diff --name-only HEAD~1...HEAD | python3 .ci/compute_projects.py) if [[ "${projects_to_build}" == "" ]]; then echo "No projects to build" diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index b1e6ba2..86d09d7 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4457,6 +4457,79 @@ def CIR_TryOp : CIR_Op<"try",[ // Atomic operations //===----------------------------------------------------------------------===// +def CIR_AtomicFetchKind : CIR_I32EnumAttr< + "AtomicFetchKind", "Binary opcode for atomic fetch-and-update operations", [ + I32EnumAttrCase<"Add", 0, "add">, + I32EnumAttrCase<"Sub", 1, "sub">, + I32EnumAttrCase<"And", 2, "and">, + I32EnumAttrCase<"Xor", 3, "xor">, + I32EnumAttrCase<"Or", 4, "or">, + I32EnumAttrCase<"Nand", 5, "nand">, + I32EnumAttrCase<"Max", 6, "max">, + I32EnumAttrCase<"Min", 7, "min"> +]>; + +def CIR_AtomicFetchOp : CIR_Op<"atomic.fetch", [ + AllTypesMatch<["result", "val"]>, + TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'", + "ptr", "val", "mlir::cast<cir::PointerType>($_self).getPointee()"> +]> { + let summary = "Atomic fetch-and-update operation"; + let description = [{ + C/C++ atomic fetch-and-update operation. This operation implements the C/C++ + builtin functions `__atomic_<binop>_fetch`, `__atomic_fetch_<binop>`, and + `__c11_atomic_fetch_<binop>`, where `<binop>` is one of the following binary + opcodes: `add`, `sub`, `and`, `xor`, `or`, `nand`, `max`, and `min`. + + This operation takes 2 arguments: a pointer `ptr` and a value `val`. The + type of `val` must match the pointee type of `ptr`. If the binary operation + is `add`, `sub`, `max`, or `min`, the type of `val` may either be an integer + type or a floating-point type. Otherwise, `val` must be an integer. + + This operation atomically loads the value from `ptr`, performs the binary + operation as indicated by `binop` on the loaded value and `val`, and stores + the result back to `ptr`. If the `fetch_first` flag is present, the result + of this operation is the old value loaded from `ptr` before the binary + operation. Otherwise, the result of this operation is the result of the + binary operation. + + Example: + %res = cir.atomic.fetch add seq_cst %ptr, %val + : (!cir.ptr<!s32i>, !s32i) -> !s32i + }]; + let results = (outs CIR_AnyIntOrFloatType:$result); + let arguments = (ins + Arg<CIR_PtrToIntOrFloatType, "", [MemRead, MemWrite]>:$ptr, + CIR_AnyIntOrFloatType:$val, + CIR_AtomicFetchKind:$binop, + Arg<CIR_MemOrder, "memory order">:$mem_order, + UnitAttr:$is_volatile, + UnitAttr:$fetch_first + ); + + let assemblyFormat = [{ + $binop $mem_order + (`fetch_first` $fetch_first^)? + $ptr `,` $val + (`volatile` $is_volatile^)? + `:` `(` qualified(type($ptr)) `,` qualified(type($val)) `)` + `->` type($result) attr-dict + }]; + + let hasVerifier = 1; + + let extraLLVMLoweringPatternDecl = [{ + mlir::Value buildPostOp(cir::AtomicFetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter, + mlir::Value rmwVal, bool isInt) const; + + mlir::Value buildMinMaxPostOp(cir::AtomicFetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter, + mlir::Value rmwVal, bool isInt, + bool isSigned) const; + }]; +} + def CIR_AtomicXchgOp : CIR_Op<"atomic.xchg", [ AllTypesMatch<["result", "val"]>, TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'", diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 01da626..598e826a 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -119,6 +119,7 @@ struct MissingFeatures { static bool opCallLandingPad() { return false; } static bool opCallContinueBlock() { return false; } static bool opCallChain() { return false; } + static bool opCallExceptionAttr() { return false; } // CXXNewExpr static bool exprNewNullCheck() { return false; } @@ -218,6 +219,9 @@ struct MissingFeatures { static bool checkBitfieldClipping() { return false; } static bool cirgenABIInfo() { return false; } static bool cleanupAfterErrorDiags() { return false; } + static bool cleanupAppendInsts() { return false; } + static bool cleanupBranchThrough() { return false; } + static bool cleanupIndexAndBIAdjustment() { return false; } static bool cleanupsToDeactivate() { return false; } static bool constEmitterAggILE() { return false; } static bool constEmitterArrayILE() { return false; } @@ -238,6 +242,7 @@ struct MissingFeatures { static bool deleteArray() { return false; } static bool devirtualizeMemberFunction() { return false; } static bool ehCleanupFlags() { return false; } + static bool ehCleanupHasPrebranchedFallthrough() { return false; } static bool ehCleanupScope() { return false; } static bool ehCleanupScopeRequiresEHCleanup() { return false; } static bool ehCleanupBranchFixups() { return false; } @@ -256,6 +261,7 @@ struct MissingFeatures { static bool generateDebugInfo() { return false; } static bool globalViewIndices() { return false; } static bool globalViewIntLowering() { return false; } + static bool handleBuiltinICEArguments() { return false; } static bool hip() { return false; } static bool incrementProfileCounter() { return false; } static bool innermostEHScope() { return false; } @@ -294,6 +300,7 @@ struct MissingFeatures { static bool setNonGC() { return false; } static bool setObjCGCLValueClass() { return false; } static bool setTargetAttributes() { return false; } + static bool simplifyCleanupEntry() { return false; } static bool sourceLanguageCases() { return false; } static bool stackBase() { return false; } static bool stackSaveOp() { return false; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index cb21335..87b96c2 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10021,7 +10021,7 @@ public: public: DeferDiagsRAII(Sema &S, bool DeferDiags) : S(S), SavedDeferDiags(S.DeferDiags) { - S.DeferDiags = DeferDiags; + S.DeferDiags = SavedDeferDiags || DeferDiags; } ~DeferDiagsRAII() { S.DeferDiags = SavedDeferDiags; } }; diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h index 550f530..8e43b0b 100644 --- a/clang/include/clang/Sema/SemaBase.h +++ b/clang/include/clang/Sema/SemaBase.h @@ -212,16 +212,13 @@ public: }; /// Emit a diagnostic. - SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID, - bool DeferHint = false); + SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID); /// Emit a partial diagnostic. - SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD, - bool DeferHint = false); + SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD); /// Emit a compatibility diagnostic. - SemaDiagnosticBuilder DiagCompat(SourceLocation Loc, unsigned CompatDiagId, - bool DeferHint = false); + SemaDiagnosticBuilder DiagCompat(SourceLocation Loc, unsigned CompatDiagId); /// Build a partial diagnostic. PartialDiagnostic PDiag(unsigned DiagID = 0); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index f7731f0..6b98927 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -4841,46 +4841,39 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, return !NeedsOp || this->emitCheckDecl(VD, VD); }; - auto initGlobal = [&](unsigned GlobalIndex) -> bool { - assert(Init); - - if (VarT) { - if (!this->visit(Init)) - return checkDecl() && false; - - return checkDecl() && this->emitInitGlobal(*VarT, GlobalIndex, VD); - } - - if (!checkDecl()) - return false; - - if (!this->emitGetPtrGlobal(GlobalIndex, Init)) - return false; - - if (!visitInitializer(Init)) - return false; - - return this->emitFinishInitGlobal(Init); - }; - DeclScope<Emitter> LocalScope(this, VD); - // We've already seen and initialized this global. - if (UnsignedOrNone GlobalIndex = P.getGlobal(VD)) { + UnsignedOrNone GlobalIndex = P.getGlobal(VD); + if (GlobalIndex) { + // We've already seen and initialized this global. if (P.getPtrGlobal(*GlobalIndex).isInitialized()) return checkDecl(); - // The previous attempt at initialization might've been unsuccessful, // so let's try this one. - return !Init || (checkDecl() && initGlobal(*GlobalIndex)); + } else if ((GlobalIndex = P.createGlobal(VD, Init))) { + } else { + return false; } + if (!Init) + return true; - UnsignedOrNone GlobalIndex = P.createGlobal(VD, Init); + if (!checkDecl()) + return false; - if (!GlobalIndex) + if (VarT) { + if (!this->visit(Init)) + return false; + + return this->emitInitGlobal(*VarT, *GlobalIndex, VD); + } + + if (!this->emitGetPtrGlobal(*GlobalIndex, Init)) + return false; + + if (!visitInitializer(Init)) return false; - return !Init || (checkDecl() && initGlobal(*GlobalIndex)); + return this->emitFinishInitGlobal(Init); } // Local variables. InitLinkScope<Emitter> ILS(this, InitLink::Decl(VD)); @@ -4890,36 +4883,37 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, VD, *VarT, VD->getType().isConstQualified(), VD->getType().isVolatileQualified(), nullptr, ScopeKind::Block, IsConstexprUnknown); - if (Init) { - // If this is a toplevel declaration, create a scope for the - // initializer. - if (Toplevel) { - LocalScope<Emitter> Scope(this); - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); - } - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD); - } - } else { - if (UnsignedOrNone Offset = this->allocateLocal( - VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { - if (!Init) - return true; - if (!this->emitGetPtrLocal(*Offset, Init)) - return false; + if (!Init) + return true; - if (!visitInitializer(Init)) + // If this is a toplevel declaration, create a scope for the + // initializer. + if (Toplevel) { + LocalScope<Emitter> Scope(this); + if (!this->visit(Init)) return false; - - return this->emitFinishInitPop(Init); + return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); } - return false; + if (!this->visit(Init)) + return false; + return this->emitSetLocal(*VarT, Offset, VD); } - return true; + // Local composite variables. + if (UnsignedOrNone Offset = this->allocateLocal( + VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { + if (!Init) + return true; + + if (!this->emitGetPtrLocal(*Offset, Init)) + return false; + + if (!visitInitializer(Init)) + return false; + + return this->emitFinishInitPop(Init); + } + return false; } template <class Emitter> diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp index c88a470..f068be5 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp @@ -24,6 +24,7 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/MatchSwitch.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Basic/LLVM.h" @@ -95,6 +96,18 @@ static QualType getStatusOrValueType(ClassTemplateSpecializationDecl *TRD) { return TRD->getTemplateArgs().get(0).getAsType(); } +static auto ofClassStatus() { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return ofClass(hasName("::absl::Status")); +} + +static auto isStatusMemberCallWithName(llvm::StringRef member_name) { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return cxxMemberCallExpr( + on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName(member_name), ofClassStatus()))); +} + static auto isStatusOrMemberCallWithName(llvm::StringRef member_name) { using namespace ::clang::ast_matchers; // NOLINT: Too many names return cxxMemberCallExpr( @@ -244,6 +257,61 @@ static void transferStatusOrOkCall(const CXXMemberCallExpr *Expr, State.Env.setValue(*Expr, OkVal); } +static void transferStatusCall(const CXXMemberCallExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + RecordStorageLocation *StatusOrLoc = + getImplicitObjectLocation(*Expr, State.Env); + if (StatusOrLoc == nullptr) + return; + + RecordStorageLocation &StatusLoc = locForStatus(*StatusOrLoc); + + if (State.Env.getValue(locForOk(StatusLoc)) == nullptr) + initializeStatusOr(*StatusOrLoc, State.Env); + + if (Expr->isPRValue()) + copyRecord(StatusLoc, State.Env.getResultObjectLocation(*Expr), State.Env); + else + State.Env.setStorageLocation(*Expr, StatusLoc); +} + +static void transferStatusOkCall(const CXXMemberCallExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + RecordStorageLocation *StatusLoc = + getImplicitObjectLocation(*Expr, State.Env); + if (StatusLoc == nullptr) + return; + + if (Value *Val = State.Env.getValue(locForOk(*StatusLoc))) + State.Env.setValue(*Expr, *Val); +} + +static void transferStatusUpdateCall(const CXXMemberCallExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + // S.Update(OtherS) sets S to the error code of OtherS if it is OK, + // otherwise does nothing. + assert(Expr->getNumArgs() == 1); + auto *Arg = Expr->getArg(0); + RecordStorageLocation *ArgRecord = + Arg->isPRValue() ? &State.Env.getResultObjectLocation(*Arg) + : State.Env.get<RecordStorageLocation>(*Arg); + RecordStorageLocation *ThisLoc = getImplicitObjectLocation(*Expr, State.Env); + if (ThisLoc == nullptr || ArgRecord == nullptr) + return; + + auto &ThisOkVal = valForOk(*ThisLoc, State.Env); + auto &ArgOkVal = valForOk(*ArgRecord, State.Env); + auto &A = State.Env.arena(); + auto &NewVal = State.Env.makeAtomicBoolValue(); + State.Env.assume(A.makeImplies(A.makeNot(ThisOkVal.formula()), + A.makeNot(NewVal.formula()))); + State.Env.assume(A.makeImplies(NewVal.formula(), ArgOkVal.formula())); + State.Env.setValue(locForOk(*ThisLoc), NewVal); +} + CFGMatchSwitch<LatticeTransferState> buildTransferMatchSwitch(ASTContext &Ctx, CFGMatchSwitchBuilder<LatticeTransferState> Builder) { @@ -251,6 +319,12 @@ buildTransferMatchSwitch(ASTContext &Ctx, return std::move(Builder) .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusOrMemberCallWithName("ok"), transferStatusOrOkCall) + .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusOrMemberCallWithName("status"), + transferStatusCall) + .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusMemberCallWithName("ok"), + transferStatusOkCall) + .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusMemberCallWithName("Update"), + transferStatusUpdateCall) .Build(); } diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index b7e8bad..f39c698 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -222,6 +222,8 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple, return std::make_unique<OHOSTargetInfo<ARMleTargetInfo>>(Triple, Opts); case llvm::Triple::FreeBSD: return std::make_unique<FreeBSDTargetInfo<ARMleTargetInfo>>(Triple, Opts); + case llvm::Triple::Fuchsia: + return std::make_unique<FuchsiaTargetInfo<ARMleTargetInfo>>(Triple, Opts); case llvm::Triple::NetBSD: return std::make_unique<NetBSDTargetInfo<ARMleTargetInfo>>(Triple, Opts); case llvm::Triple::OpenBSD: @@ -254,6 +256,8 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple, return std::make_unique<AppleMachOARMTargetInfo>(Triple, Opts); switch (os) { + case llvm::Triple::Fuchsia: + return std::make_unique<FuchsiaTargetInfo<ARMbeTargetInfo>>(Triple, Opts); case llvm::Triple::Linux: return std::make_unique<LinuxTargetInfo<ARMbeTargetInfo>>(Triple, Opts); case llvm::Triple::NetBSD: diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 3de17d2..d00a3a4 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -260,6 +260,7 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple, : TargetInfo(Triple), FPMath(FP_Default), IsAAPCS(true), LDREX(0), HW_FP(0) { bool IsFreeBSD = Triple.isOSFreeBSD(); + bool IsFuchsia = Triple.isOSFuchsia(); bool IsOpenBSD = Triple.isOSOpenBSD(); bool IsNetBSD = Triple.isOSNetBSD(); bool IsHaiku = Triple.isOSHaiku(); @@ -332,7 +333,7 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple, default: if (IsNetBSD) setABI("apcs-gnu"); - else if (IsFreeBSD || IsOpenBSD || IsHaiku || IsOHOS) + else if (IsFreeBSD || IsFuchsia || IsOpenBSD || IsHaiku || IsOHOS) setABI("aapcs-linux"); else setABI("aapcs"); diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 04da4e6..685925b 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -192,8 +192,11 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv_muldiv"); } - if (ISAInfo->hasExtension("a")) { + // The "a" extension is composed of "zalrsc" and "zaamo" + if (ISAInfo->hasExtension("a")) Builder.defineMacro("__riscv_atomic"); + + if (ISAInfo->hasExtension("zalrsc")) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index d8b0e64..85fa4cc 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -195,7 +195,8 @@ public: void setMaxAtomicWidth() override { MaxAtomicPromoteWidth = 128; - if (ISAInfo->hasExtension("a")) + // "a" implies "zalrsc" which is sufficient to inline atomics + if (ISAInfo->hasExtension("zalrsc")) MaxAtomicInlineWidth = 32; } }; @@ -225,7 +226,8 @@ public: void setMaxAtomicWidth() override { MaxAtomicPromoteWidth = 128; - if (ISAInfo->hasExtension("a")) + // "a" implies "zalrsc" which is sufficient to inline atomics + if (ISAInfo->hasExtension("zalrsc")) MaxAtomicInlineWidth = 64; } }; diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 67ca60c..7db6e28 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -346,6 +346,8 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, CIRGenBuilderTy &builder = cgf.getBuilder(); mlir::Location loc = cgf.getLoc(expr->getSourceRange()); auto orderAttr = cir::MemOrderAttr::get(builder.getContext(), order); + cir::AtomicFetchKindAttr fetchAttr; + bool fetchFirst = true; switch (expr->getOp()) { case AtomicExpr::AO__c11_atomic_init: @@ -407,6 +409,86 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, opName = cir::AtomicXchgOp::getOperationName(); break; + case AtomicExpr::AO__atomic_add_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__atomic_fetch_add: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Add); + break; + + case AtomicExpr::AO__atomic_sub_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__atomic_fetch_sub: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Sub); + break; + + case AtomicExpr::AO__atomic_min_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_min: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Min); + break; + + case AtomicExpr::AO__atomic_max_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_max: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Max); + break; + + case AtomicExpr::AO__atomic_and_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__atomic_fetch_and: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::And); + break; + + case AtomicExpr::AO__atomic_or_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__atomic_fetch_or: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Or); + break; + + case AtomicExpr::AO__atomic_xor_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__atomic_fetch_xor: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Xor); + break; + + case AtomicExpr::AO__atomic_nand_fetch: + fetchFirst = false; + [[fallthrough]]; + case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__atomic_fetch_nand: + opName = cir::AtomicFetchOp::getOperationName(); + fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(), + cir::AtomicFetchKind::Nand); + break; + case AtomicExpr::AO__atomic_test_and_set: { auto op = cir::AtomicTestAndSetOp::create( builder, loc, ptr.getPointer(), order, @@ -450,74 +532,50 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, case AtomicExpr::AO__scoped_atomic_exchange_n: case AtomicExpr::AO__scoped_atomic_exchange: - case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__scoped_atomic_add_fetch: - case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: - case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__scoped_atomic_fetch_add: - case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__scoped_atomic_sub_fetch: - case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: - case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__scoped_atomic_fetch_sub: - case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__scoped_atomic_min_fetch: - case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: - case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__scoped_atomic_fetch_min: - case AtomicExpr::AO__atomic_max_fetch: case AtomicExpr::AO__scoped_atomic_max_fetch: - case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: - case AtomicExpr::AO__atomic_fetch_max: case AtomicExpr::AO__scoped_atomic_fetch_max: - case AtomicExpr::AO__atomic_and_fetch: case AtomicExpr::AO__scoped_atomic_and_fetch: - case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: - case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__scoped_atomic_fetch_and: - case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__scoped_atomic_or_fetch: - case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: - case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__scoped_atomic_fetch_or: - case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__scoped_atomic_xor_fetch: - case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: - case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__scoped_atomic_fetch_xor: - case AtomicExpr::AO__atomic_nand_fetch: case AtomicExpr::AO__scoped_atomic_nand_fetch: - case AtomicExpr::AO__c11_atomic_fetch_nand: - case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__scoped_atomic_fetch_nand: cgf.cgm.errorNYI(expr->getSourceRange(), "emitAtomicOp: expr op NYI"); return; @@ -531,9 +589,13 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, mlir::Operation *rmwOp = builder.create(loc, builder.getStringAttr(opName), atomicOperands, atomicResTys); + if (fetchAttr) + rmwOp->setAttr("binop", fetchAttr); rmwOp->setAttr("mem_order", orderAttr); if (expr->isVolatile()) rmwOp->setAttr("is_volatile", builder.getUnitAttr()); + if (fetchFirst && opName == cir::AtomicFetchOp::getOperationName()) + rmwOp->setAttr("fetch_first", builder.getUnitAttr()); mlir::Value result = rmwOp->getResult(0); builder.createStore(loc, result, dest); @@ -629,8 +691,41 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { isWeakExpr = e->getWeak(); break; + case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__c11_atomic_fetch_sub: + if (memTy->isPointerType()) { + cgm.errorNYI(e->getSourceRange(), + "atomic fetch-and-add and fetch-and-sub for pointers"); + return RValue::get(nullptr); + } + [[fallthrough]]; + case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_sub: + case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: + shouldCastToIntPtrTy = !memTy->isFloatingType(); + [[fallthrough]]; + + case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__atomic_fetch_nand: + case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__atomic_or_fetch: + case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_store_n: + case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_store: val1 = emitValToTemp(*this, e->getVal1()); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index ea31871..798e9d9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -463,12 +463,107 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitLibraryCall(*this, fd, e, cgm.getBuiltinLibFunction(fd, builtinID)); + // Some target-specific builtins can have aggregate return values, e.g. + // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force + // returnValue to be non-null, so that the target-specific emission code can + // always just emit into it. + cir::TypeEvaluationKind evalKind = getEvaluationKind(e->getType()); + if (evalKind == cir::TEK_Aggregate && returnValue.isNull()) { + cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); + return getUndefRValue(e->getType()); + } + + // Now see if we can emit a target-specific builtin. + if (mlir::Value v = emitTargetBuiltinExpr(builtinID, e, returnValue)) { + switch (evalKind) { + case cir::TEK_Scalar: + if (mlir::isa<cir::VoidType>(v.getType())) + return RValue::get(nullptr); + return RValue::get(v); + case cir::TEK_Aggregate: + cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); + return getUndefRValue(e->getType()); + case cir::TEK_Complex: + llvm_unreachable("No current target builtin returns complex"); + } + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } + cgm.errorNYI(e->getSourceRange(), std::string("unimplemented builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return getUndefRValue(e->getType()); } +static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, + unsigned builtinID, + const CallExpr *e, + ReturnValueSlot &returnValue, + llvm::Triple::ArchType arch) { + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + // Returning nullptr here leads to the builtin being handled in + // EmitStdParUnsupportedBuiltin. + if (cgf->getLangOpts().HIPStdPar && cgf->getLangOpts().CUDAIsDevice && + arch != cgf->getTarget().getTriple().getArch()) + return {}; + + switch (arch) { + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: + case llvm::Triple::aarch64_be: + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + // These are actually NYI, but that will be reported by emitBuiltinExpr. + // At this point, we don't even know that the builtin is target-specific. + return nullptr; + + case llvm::Triple::x86: + case llvm::Triple::x86_64: + return cgf->emitX86BuiltinExpr(builtinID, e); + + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::r600: + case llvm::Triple::amdgcn: + case llvm::Triple::systemz: + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + case llvm::Triple::hexagon: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + // These are actually NYI, but that will be reported by emitBuiltinExpr. + // At this point, we don't even know that the builtin is target-specific. + return {}; + default: + return {}; + } +} + +mlir::Value +CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e, + ReturnValueSlot &returnValue) { + if (getContext().BuiltinInfo.isAuxBuiltinID(builtinID)) { + assert(getContext().getAuxTargetInfo() && "Missing aux target info"); + return emitTargetArchBuiltinExpr( + this, getContext().BuiltinInfo.getAuxBuiltinID(builtinID), e, + returnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); + } + + return emitTargetArchBuiltinExpr(this, builtinID, e, returnValue, + getTarget().getTriple().getArch()); +} + /// Given a builtin id for a function like "__builtin_fabsf", return a Function* /// for "fabsf". cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd, diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp new file mode 100644 index 0000000..3c9c7ec --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -0,0 +1,814 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit x86/x86_64 Builtin calls as CIR or a function +// call to be later resolved. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "CIRGenModule.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/MissingFeatures.h" +#include "llvm/IR/IntrinsicsX86.h" + +using namespace clang; +using namespace clang::CIRGen; + +mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, + const CallExpr *e) { + if (builtinID == Builtin::BI__builtin_cpu_is) { + cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_supports) { + cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports"); + return {}; + } + if (builtinID == Builtin::BI__builtin_cpu_init) { + cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init"); + return {}; + } + + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + assert(!cir::MissingFeatures::msvcBuiltins()); + + // Find out if any arguments are required to be integer constant expressions. + assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + + switch (builtinID) { + default: + return {}; + case X86::BI_mm_prefetch: + case X86::BI_mm_clflush: + case X86::BI_mm_lfence: + case X86::BI_mm_pause: + case X86::BI_mm_mfence: + case X86::BI_mm_sfence: + case X86::BI__rdtsc: + case X86::BI__builtin_ia32_rdtscp: + case X86::BI__builtin_ia32_lzcnt_u16: + case X86::BI__builtin_ia32_lzcnt_u32: + case X86::BI__builtin_ia32_lzcnt_u64: + case X86::BI__builtin_ia32_tzcnt_u16: + case X86::BI__builtin_ia32_tzcnt_u32: + case X86::BI__builtin_ia32_tzcnt_u64: + case X86::BI__builtin_ia32_undef128: + case X86::BI__builtin_ia32_undef256: + case X86::BI__builtin_ia32_undef512: + case X86::BI__builtin_ia32_vec_ext_v4hi: + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_set_v4hi: + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: + case X86::BI_mm_setcsr: + case X86::BI__builtin_ia32_ldmxcsr: + case X86::BI_mm_getcsr: + case X86::BI__builtin_ia32_stmxcsr: + case X86::BI__builtin_ia32_xsave: + case X86::BI__builtin_ia32_xsave64: + case X86::BI__builtin_ia32_xrstor: + case X86::BI__builtin_ia32_xrstor64: + case X86::BI__builtin_ia32_xsaveopt: + case X86::BI__builtin_ia32_xsaveopt64: + case X86::BI__builtin_ia32_xrstors: + case X86::BI__builtin_ia32_xrstors64: + case X86::BI__builtin_ia32_xsavec: + case X86::BI__builtin_ia32_xsavec64: + case X86::BI__builtin_ia32_xsaves: + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + case X86::BI__builtin_ia32_storedqudi128_mask: + case X86::BI__builtin_ia32_storedqusi128_mask: + case X86::BI__builtin_ia32_storedquhi128_mask: + case X86::BI__builtin_ia32_storedquqi128_mask: + case X86::BI__builtin_ia32_storeupd128_mask: + case X86::BI__builtin_ia32_storeups128_mask: + case X86::BI__builtin_ia32_storedqudi256_mask: + case X86::BI__builtin_ia32_storedqusi256_mask: + case X86::BI__builtin_ia32_storedquhi256_mask: + case X86::BI__builtin_ia32_storedquqi256_mask: + case X86::BI__builtin_ia32_storeupd256_mask: + case X86::BI__builtin_ia32_storeups256_mask: + case X86::BI__builtin_ia32_storedqudi512_mask: + case X86::BI__builtin_ia32_storedqusi512_mask: + case X86::BI__builtin_ia32_storedquhi512_mask: + case X86::BI__builtin_ia32_storedquqi512_mask: + case X86::BI__builtin_ia32_storeupd512_mask: + case X86::BI__builtin_ia32_storeups512_mask: + case X86::BI__builtin_ia32_storesbf16128_mask: + case X86::BI__builtin_ia32_storesh128_mask: + case X86::BI__builtin_ia32_storess128_mask: + case X86::BI__builtin_ia32_storesd128_mask: + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: + case X86::BI__builtin_ia32_vfmaddss3: + case X86::BI__builtin_ia32_vfmaddsd3: + case X86::BI__builtin_ia32_vfmaddsh3_mask: + case X86::BI__builtin_ia32_vfmaddss3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: + case X86::BI__builtin_ia32_vfmaddss: + case X86::BI__builtin_ia32_vfmaddsd: + case X86::BI__builtin_ia32_vfmaddsh3_maskz: + case X86::BI__builtin_ia32_vfmaddss3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: + case X86::BI__builtin_ia32_vfmaddsh3_mask3: + case X86::BI__builtin_ia32_vfmaddss3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: + case X86::BI__builtin_ia32_vfmsubsh3_mask3: + case X86::BI__builtin_ia32_vfmsubss3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: + case X86::BI__builtin_ia32_vfmaddph512_mask: + case X86::BI__builtin_ia32_vfmaddph512_maskz: + case X86::BI__builtin_ia32_vfmaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddps512_mask: + case X86::BI__builtin_ia32_vfmaddps512_maskz: + case X86::BI__builtin_ia32_vfmaddps512_mask3: + case X86::BI__builtin_ia32_vfmsubps512_mask3: + case X86::BI__builtin_ia32_vfmaddpd512_mask: + case X86::BI__builtin_ia32_vfmaddpd512_maskz: + case X86::BI__builtin_ia32_vfmaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubph512_mask: + case X86::BI__builtin_ia32_vfmaddsubph512_maskz: + case X86::BI__builtin_ia32_vfmaddsubph512_mask3: + case X86::BI__builtin_ia32_vfmsubaddph512_mask3: + case X86::BI__builtin_ia32_vfmaddsubps512_mask: + case X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case X86::BI__builtin_ia32_movdqa32store128_mask: + case X86::BI__builtin_ia32_movdqa64store128_mask: + case X86::BI__builtin_ia32_storeaps128_mask: + case X86::BI__builtin_ia32_storeapd128_mask: + case X86::BI__builtin_ia32_movdqa32store256_mask: + case X86::BI__builtin_ia32_movdqa64store256_mask: + case X86::BI__builtin_ia32_storeaps256_mask: + case X86::BI__builtin_ia32_storeapd256_mask: + case X86::BI__builtin_ia32_movdqa32store512_mask: + case X86::BI__builtin_ia32_movdqa64store512_mask: + case X86::BI__builtin_ia32_storeaps512_mask: + case X86::BI__builtin_ia32_storeapd512_mask: + case X86::BI__builtin_ia32_loadups128_mask: + case X86::BI__builtin_ia32_loadups256_mask: + case X86::BI__builtin_ia32_loadups512_mask: + case X86::BI__builtin_ia32_loadupd128_mask: + case X86::BI__builtin_ia32_loadupd256_mask: + case X86::BI__builtin_ia32_loadupd512_mask: + case X86::BI__builtin_ia32_loaddquqi128_mask: + case X86::BI__builtin_ia32_loaddquqi256_mask: + case X86::BI__builtin_ia32_loaddquqi512_mask: + case X86::BI__builtin_ia32_loaddquhi128_mask: + case X86::BI__builtin_ia32_loaddquhi256_mask: + case X86::BI__builtin_ia32_loaddquhi512_mask: + case X86::BI__builtin_ia32_loaddqusi128_mask: + case X86::BI__builtin_ia32_loaddqusi256_mask: + case X86::BI__builtin_ia32_loaddqusi512_mask: + case X86::BI__builtin_ia32_loaddqudi128_mask: + case X86::BI__builtin_ia32_loaddqudi256_mask: + case X86::BI__builtin_ia32_loaddqudi512_mask: + case X86::BI__builtin_ia32_loadsbf16128_mask: + case X86::BI__builtin_ia32_loadsh128_mask: + case X86::BI__builtin_ia32_loadss128_mask: + case X86::BI__builtin_ia32_loadsd128_mask: + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadaps256_mask: + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd128_mask: + case X86::BI__builtin_ia32_loadapd256_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa32load256_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + case X86::BI__builtin_ia32_movdqa64load256_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: + case X86::BI__builtin_ia32_expandloaddf128_mask: + case X86::BI__builtin_ia32_expandloaddf256_mask: + case X86::BI__builtin_ia32_expandloaddf512_mask: + case X86::BI__builtin_ia32_expandloadsf128_mask: + case X86::BI__builtin_ia32_expandloadsf256_mask: + case X86::BI__builtin_ia32_expandloadsf512_mask: + case X86::BI__builtin_ia32_expandloaddi128_mask: + case X86::BI__builtin_ia32_expandloaddi256_mask: + case X86::BI__builtin_ia32_expandloaddi512_mask: + case X86::BI__builtin_ia32_expandloadsi128_mask: + case X86::BI__builtin_ia32_expandloadsi256_mask: + case X86::BI__builtin_ia32_expandloadsi512_mask: + case X86::BI__builtin_ia32_expandloadhi128_mask: + case X86::BI__builtin_ia32_expandloadhi256_mask: + case X86::BI__builtin_ia32_expandloadhi512_mask: + case X86::BI__builtin_ia32_expandloadqi128_mask: + case X86::BI__builtin_ia32_expandloadqi256_mask: + case X86::BI__builtin_ia32_expandloadqi512_mask: + case X86::BI__builtin_ia32_compressstoredf128_mask: + case X86::BI__builtin_ia32_compressstoredf256_mask: + case X86::BI__builtin_ia32_compressstoredf512_mask: + case X86::BI__builtin_ia32_compressstoresf128_mask: + case X86::BI__builtin_ia32_compressstoresf256_mask: + case X86::BI__builtin_ia32_compressstoresf512_mask: + case X86::BI__builtin_ia32_compressstoredi128_mask: + case X86::BI__builtin_ia32_compressstoredi256_mask: + case X86::BI__builtin_ia32_compressstoredi512_mask: + case X86::BI__builtin_ia32_compressstoresi128_mask: + case X86::BI__builtin_ia32_compressstoresi256_mask: + case X86::BI__builtin_ia32_compressstoresi512_mask: + case X86::BI__builtin_ia32_compressstorehi128_mask: + case X86::BI__builtin_ia32_compressstorehi256_mask: + case X86::BI__builtin_ia32_compressstorehi512_mask: + case X86::BI__builtin_ia32_compressstoreqi128_mask: + case X86::BI__builtin_ia32_compressstoreqi256_mask: + case X86::BI__builtin_ia32_compressstoreqi512_mask: + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_pmovqd512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: + case X86::BI__builtin_ia32_permdi256: + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi512: + case X86::BI__builtin_ia32_permdf512: + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_i64x2: + case X86::BI__builtin_ia32_vperm2f128_pd256: + case X86::BI__builtin_ia32_vperm2f128_ps256: + case X86::BI__builtin_ia32_vperm2f128_si256: + case X86::BI__builtin_ia32_permti256: + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + case X86::BI__builtin_ia32_pslldqi512_byteshift: + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: + case X86::BI__builtin_ia32_vprotbi: + case X86::BI__builtin_ia32_vprotwi: + case X86::BI__builtin_ia32_vprotdi: + case X86::BI__builtin_ia32_vprotqi: + case X86::BI__builtin_ia32_prold128: + case X86::BI__builtin_ia32_prold256: + case X86::BI__builtin_ia32_prold512: + case X86::BI__builtin_ia32_prolq128: + case X86::BI__builtin_ia32_prolq256: + case X86::BI__builtin_ia32_prolq512: + case X86::BI__builtin_ia32_prord128: + case X86::BI__builtin_ia32_prord256: + case X86::BI__builtin_ia32_prord512: + case X86::BI__builtin_ia32_prorq128: + case X86::BI__builtin_ia32_prorq256: + case X86::BI__builtin_ia32_prorq512: + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + case X86::BI__builtin_ia32_selectsh_128: + case X86::BI__builtin_ia32_selectsbf_128: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpq512_mask: + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + case X86::BI__builtin_ia32_kortestcqi: + case X86::BI__builtin_ia32_kortestchi: + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: + case X86::BI__builtin_ia32_ktestcqi: + case X86::BI__builtin_ia32_ktestzqi: + case X86::BI__builtin_ia32_ktestchi: + case X86::BI__builtin_ia32_ktestzhi: + case X86::BI__builtin_ia32_ktestcsi: + case X86::BI__builtin_ia32_ktestzsi: + case X86::BI__builtin_ia32_ktestcdi: + case X86::BI__builtin_ia32_ktestzdi: + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + case X86::BI__builtin_ia32_kxorqi: + case X86::BI__builtin_ia32_kxorhi: + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_sqrtsh_round_mask: + case X86::BI__builtin_ia32_sqrtsd_round_mask: + case X86::BI__builtin_ia32_sqrtss_round_mask: + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtph256: + case X86::BI__builtin_ia32_sqrtph: + case X86::BI__builtin_ia32_sqrtph512: + case X86::BI__builtin_ia32_vsqrtbf16256: + case X86::BI__builtin_ia32_vsqrtbf16: + case X86::BI__builtin_ia32_vsqrtbf16512: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: + case X86::BI__builtin_ia32_pmuludq128: + case X86::BI__builtin_ia32_pmuludq256: + case X86::BI__builtin_ia32_pmuludq512: + case X86::BI__builtin_ia32_pmuldq128: + case X86::BI__builtin_ia32_pmuldq256: + case X86::BI__builtin_ia32_pmuldq512: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + case X86::BI__builtin_ia32_reduce_fadd_pd512: + case X86::BI__builtin_ia32_reduce_fadd_ps512: + case X86::BI__builtin_ia32_reduce_fadd_ph512: + case X86::BI__builtin_ia32_reduce_fadd_ph256: + case X86::BI__builtin_ia32_reduce_fadd_ph128: + case X86::BI__builtin_ia32_reduce_fmul_pd512: + case X86::BI__builtin_ia32_reduce_fmul_ps512: + case X86::BI__builtin_ia32_reduce_fmul_ph512: + case X86::BI__builtin_ia32_reduce_fmul_ph256: + case X86::BI__builtin_ia32_reduce_fmul_ph128: + case X86::BI__builtin_ia32_reduce_fmax_pd512: + case X86::BI__builtin_ia32_reduce_fmax_ps512: + case X86::BI__builtin_ia32_reduce_fmax_ph512: + case X86::BI__builtin_ia32_reduce_fmax_ph256: + case X86::BI__builtin_ia32_reduce_fmax_ph128: + case X86::BI__builtin_ia32_reduce_fmin_pd512: + case X86::BI__builtin_ia32_reduce_fmin_ps512: + case X86::BI__builtin_ia32_reduce_fmin_ph512: + case X86::BI__builtin_ia32_reduce_fmin_ph256: + case X86::BI__builtin_ia32_reduce_fmin_ph128: + case X86::BI__builtin_ia32_rdrand16_step: + case X86::BI__builtin_ia32_rdrand32_step: + case X86::BI__builtin_ia32_rdrand64_step: + case X86::BI__builtin_ia32_rdseed16_step: + case X86::BI__builtin_ia32_rdseed32_step: + case X86::BI__builtin_ia32_rdseed64_step: + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: + case X86::BI__builtin_ia32_fpclassps128_mask: + case X86::BI__builtin_ia32_fpclassps256_mask: + case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + case X86::BI__builtin_ia32_vfpclassbf16512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: + case X86::BI__builtin_ia32_fpclasspd128_mask: + case X86::BI__builtin_ia32_fpclasspd256_mask: + case X86::BI__builtin_ia32_fpclasspd512_mask: + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpph128_mask: + case X86::BI__builtin_ia32_cmpph256_mask: + case X86::BI__builtin_ia32_cmpph512_mask: + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: + case X86::BI__builtin_ia32_vcmpbf16512_mask: + case X86::BI__builtin_ia32_vcmpbf16256_mask: + case X86::BI__builtin_ia32_vcmpbf16128_mask: + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + case X86::BI__builtin_ia32_vcvtph2ps_mask: + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: + case X86::BI__builtin_ia32_cvtsbf162ss_32: + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + case X86::BI__cpuid: + case X86::BI__cpuidex: + case X86::BI__emul: + case X86::BI__emulu: + case X86::BI__mulh: + case X86::BI__umulh: + case X86::BI_mul128: + case X86::BI_umul128: + case X86::BI__faststorefence: + case X86::BI__shiftleft128: + case X86::BI__shiftright128: + case X86::BI_ReadWriteBarrier: + case X86::BI_ReadBarrier: + case X86::BI_WriteBarrier: + case X86::BI_AddressOfReturnAddress: + case X86::BI__stosb: + case X86::BI__builtin_ia32_t2rpntlvwz0_internal: + case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: + case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: + case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: + case X86::BI__builtin_ia32_t2rpntlvwz1_internal: + case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: + case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: + case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: + case X86::BI__ud2: + case X86::BI__int2c: + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: + case X86::BI__builtin_ia32_encodekey128_u32: + case X86::BI__builtin_ia32_encodekey256_u32: + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + case X86::BI__builtin_ia32_vfcmaddcph512_mask: + case X86::BI__builtin_ia32_vfmaddcph512_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask: + case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: + case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: + case X86::BI__builtin_ia32_prefetchi: + cgm.errorNYI(e->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } +} diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp index df42af8..eef3739 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp @@ -37,6 +37,10 @@ CIRGenCXXABI::AddedStructorArgCounts CIRGenCXXABI::addImplicitConstructorArgs( addedArgs.suffix.size()); } +CatchTypeInfo CIRGenCXXABI::getCatchAllTypeInfo() { + return CatchTypeInfo{{}, 0}; +} + void CIRGenCXXABI::buildThisParam(CIRGenFunction &cgf, FunctionArgList ¶ms) { const auto *md = cast<CXXMethodDecl>(cgf.curGD.getDecl()); @@ -81,8 +85,7 @@ CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *e) { if (!requiresArrayCookie(e)) return CharUnits::Zero(); - cgm.errorNYI(e->getSourceRange(), "CIRGenCXXABI::getArrayCookieSize"); - return CharUnits::Zero(); + return getArrayCookieSizeImpl(e->getAllocatedType()); } bool CIRGenCXXABI::requiresArrayCookie(const CXXNewExpr *e) { diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h index 6d3741c4..c78f9b0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_LIB_CIR_CIRGENCXXABI_H #include "CIRGenCall.h" +#include "CIRGenCleanup.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" @@ -155,6 +156,8 @@ public: /// Loads the incoming C++ this pointer as it was passed by the caller. mlir::Value loadIncomingCXXThis(CIRGenFunction &cgf); + virtual CatchTypeInfo getCatchAllTypeInfo(); + /// Get the implicit (second) parameter that comes after the "this" pointer, /// or nullptr if there is isn't one. virtual mlir::Value getCXXDestructorImplicitParam(CIRGenFunction &cgf, @@ -299,8 +302,28 @@ public: /// - non-array allocations never need a cookie /// - calls to \::operator new(size_t, void*) never need a cookie /// - /// \param E - the new-expression being allocated. + /// \param e - the new-expression being allocated. virtual CharUnits getArrayCookieSize(const CXXNewExpr *e); + + /// Initialize the array cookie for the given allocation. + /// + /// \param newPtr - a char* which is the presumed-non-null + /// return value of the allocation function + /// \param numElements - the computed number of elements, + /// potentially collapsed from the multidimensional array case; + /// always a size_t + /// \param elementType - the base element allocated type, + /// i.e. the allocated type after stripping all array types + virtual Address initializeArrayCookie(CIRGenFunction &cgf, Address newPtr, + mlir::Value numElements, + const CXXNewExpr *e, + QualType elementType) = 0; + +protected: + /// Returns the extra size required in order to store the array + /// cookie for the given type. Assumes that an array cookie is + /// required. + virtual CharUnits getArrayCookieSizeImpl(QualType elementType) = 0; }; /// Creates and Itanium-family ABI diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp index 8700697..851328a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp @@ -28,6 +28,46 @@ using namespace clang::CIRGen; // CIRGenFunction cleanup related //===----------------------------------------------------------------------===// +/// Build a unconditional branch to the lexical scope cleanup block +/// or with the labeled blocked if already solved. +/// +/// Track on scope basis, goto's we need to fix later. +cir::BrOp CIRGenFunction::emitBranchThroughCleanup(mlir::Location loc, + JumpDest dest) { + // Insert a branch: to the cleanup block (unsolved) or to the already + // materialized label. Keep track of unsolved goto's. + assert(dest.getBlock() && "assumes incoming valid dest"); + auto brOp = cir::BrOp::create(builder, loc, dest.getBlock()); + + // Calculate the innermost active normal cleanup. + EHScopeStack::stable_iterator topCleanup = + ehStack.getInnermostActiveNormalCleanup(); + + // If we're not in an active normal cleanup scope, or if the + // destination scope is within the innermost active normal cleanup + // scope, we don't need to worry about fixups. + if (topCleanup == ehStack.stable_end() || + topCleanup.encloses(dest.getScopeDepth())) { // works for invalid + // FIXME(cir): should we clear insertion point here? + return brOp; + } + + // If we can't resolve the destination cleanup scope, just add this + // to the current cleanup scope as a branch fixup. + if (!dest.getScopeDepth().isValid()) { + BranchFixup &fixup = ehStack.addBranchFixup(); + fixup.destination = dest.getBlock(); + fixup.destinationIndex = dest.getDestIndex(); + fixup.initialBranch = brOp; + fixup.optimisticBranchBlock = nullptr; + // FIXME(cir): should we clear insertion point here? + return brOp; + } + + cgm.errorNYI(loc, "emitBranchThroughCleanup: valid destination scope depth"); + return brOp; +} + /// Emits all the code to cause the given temporary to be cleaned up. void CIRGenFunction::emitCXXTemporary(const CXXTemporary *temporary, QualType tempType, Address ptr) { @@ -40,6 +80,19 @@ void CIRGenFunction::emitCXXTemporary(const CXXTemporary *temporary, void EHScopeStack::Cleanup::anchor() {} +EHScopeStack::stable_iterator +EHScopeStack::getInnermostActiveNormalCleanup() const { + stable_iterator si = getInnermostNormalCleanup(); + stable_iterator se = stable_end(); + while (si != se) { + EHCleanupScope &cleanup = llvm::cast<EHCleanupScope>(*find(si)); + if (cleanup.isActive()) + return si; + si = cleanup.getEnclosingNormalCleanup(); + } + return stable_end(); +} + /// Push an entry of the given size onto this protected-scope stack. char *EHScopeStack::allocate(size_t size) { size = llvm::alignTo(size, ScopeStackAlignment); @@ -75,14 +128,30 @@ void EHScopeStack::deallocate(size_t size) { startOfData += llvm::alignTo(size, ScopeStackAlignment); } +/// Remove any 'null' fixups on the stack. However, we can't pop more +/// fixups than the fixup depth on the innermost normal cleanup, or +/// else fixups that we try to add to that cleanup will end up in the +/// wrong place. We *could* try to shrink fixup depths, but that's +/// actually a lot of work for little benefit. +void EHScopeStack::popNullFixups() { + // We expect this to only be called when there's still an innermost + // normal cleanup; otherwise there really shouldn't be any fixups. + cgf->cgm.errorNYI("popNullFixups"); +} + void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) { char *buffer = allocate(EHCleanupScope::getSizeForCleanupSize(size)); + bool isNormalCleanup = kind & NormalCleanup; bool isEHCleanup = kind & EHCleanup; bool isLifetimeMarker = kind & LifetimeMarker; assert(!cir::MissingFeatures::innermostEHScope()); - EHCleanupScope *scope = new (buffer) EHCleanupScope(size); + EHCleanupScope *scope = new (buffer) + EHCleanupScope(size, branchFixups.size(), innermostNormalCleanup); + + if (isNormalCleanup) + innermostNormalCleanup = stable_begin(); if (isLifetimeMarker) cgf->cgm.errorNYI("push lifetime marker cleanup"); @@ -100,12 +169,30 @@ void EHScopeStack::popCleanup() { assert(isa<EHCleanupScope>(*begin())); EHCleanupScope &cleanup = cast<EHCleanupScope>(*begin()); + innermostNormalCleanup = cleanup.getEnclosingNormalCleanup(); deallocate(cleanup.getAllocatedSize()); // Destroy the cleanup. cleanup.destroy(); - assert(!cir::MissingFeatures::ehCleanupBranchFixups()); + // Check whether we can shrink the branch-fixups stack. + if (!branchFixups.empty()) { + // If we no longer have any normal cleanups, all the fixups are + // complete. + if (!hasNormalCleanups()) { + branchFixups.clear(); + } else { + // Otherwise we can still trim out unnecessary nulls. + popNullFixups(); + } + } +} + +EHCatchScope *EHScopeStack::pushCatch(unsigned numHandlers) { + char *buffer = allocate(EHCatchScope::getSizeForNumHandlers(numHandlers)); + assert(!cir::MissingFeatures::innermostEHScope()); + EHCatchScope *scope = new (buffer) EHCatchScope(numHandlers); + return scope; } static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup) { @@ -116,6 +203,18 @@ static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup) { assert(cgf.haveInsertPoint() && "cleanup ended with no insertion point?"); } +static mlir::Block *createNormalEntry(CIRGenFunction &cgf, + EHCleanupScope &scope) { + assert(scope.isNormalCleanup()); + mlir::Block *entry = scope.getNormalBlock(); + if (!entry) { + mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder()); + entry = cgf.curLexScope->getOrCreateCleanupBlock(cgf.getBuilder()); + scope.setNormalBlock(entry); + } + return entry; +} + /// Pops a cleanup block. If the block includes a normal cleanup, the /// current insertion point is threaded through the cleanup, as are /// any branch fixups on the cleanup. @@ -123,17 +222,21 @@ void CIRGenFunction::popCleanupBlock() { assert(!ehStack.empty() && "cleanup stack is empty!"); assert(isa<EHCleanupScope>(*ehStack.begin()) && "top not a cleanup!"); EHCleanupScope &scope = cast<EHCleanupScope>(*ehStack.begin()); + assert(scope.getFixupDepth() <= ehStack.getNumBranchFixups()); // Remember activation information. bool isActive = scope.isActive(); - assert(!cir::MissingFeatures::ehCleanupBranchFixups()); + // - whether there are branch fix-ups through this cleanup + unsigned fixupDepth = scope.getFixupDepth(); + bool hasFixups = ehStack.getNumBranchFixups() != fixupDepth; // - whether there's a fallthrough mlir::Block *fallthroughSource = builder.getInsertionBlock(); bool hasFallthrough = fallthroughSource != nullptr && isActive; - bool requiresNormalCleanup = scope.isNormalCleanup() && hasFallthrough; + bool requiresNormalCleanup = + scope.isNormalCleanup() && (hasFixups || hasFallthrough); // If we don't need the cleanup at all, we're done. assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); @@ -168,9 +271,119 @@ void CIRGenFunction::popCleanupBlock() { assert(!cir::MissingFeatures::ehCleanupFlags()); - ehStack.popCleanup(); - scope.markEmitted(); - emitCleanup(*this, cleanup); + // If we have a fallthrough and no other need for the cleanup, + // emit it directly. + if (hasFallthrough && !hasFixups) { + assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); + ehStack.popCleanup(); + scope.markEmitted(); + emitCleanup(*this, cleanup); + } else { + // Otherwise, the best approach is to thread everything through + // the cleanup block and then try to clean up after ourselves. + + // Force the entry block to exist. + mlir::Block *normalEntry = createNormalEntry(*this, scope); + + // I. Set up the fallthrough edge in. + mlir::OpBuilder::InsertPoint savedInactiveFallthroughIP; + + // If there's a fallthrough, we need to store the cleanup + // destination index. For fall-throughs this is always zero. + if (hasFallthrough) { + assert(!cir::MissingFeatures::ehCleanupHasPrebranchedFallthrough()); + + } else if (fallthroughSource) { + // Otherwise, save and clear the IP if we don't have fallthrough + // because the cleanup is inactive. + assert(!isActive && "source without fallthrough for active cleanup"); + savedInactiveFallthroughIP = builder.saveInsertionPoint(); + } + + // II. Emit the entry block. This implicitly branches to it if + // we have fallthrough. All the fixups and existing branches + // should already be branched to it. + builder.setInsertionPointToEnd(normalEntry); + + // intercept normal cleanup to mark SEH scope end + assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); + + // III. Figure out where we're going and build the cleanup + // epilogue. + bool hasEnclosingCleanups = + (scope.getEnclosingNormalCleanup() != ehStack.stable_end()); + + // Compute the branch-through dest if we need it: + // - if there are branch-throughs threaded through the scope + // - if fall-through is a branch-through + // - if there are fixups that will be optimistically forwarded + // to the enclosing cleanup + assert(!cir::MissingFeatures::cleanupBranchThrough()); + if (hasFixups && hasEnclosingCleanups) + cgm.errorNYI("cleanup branch-through dest"); + + mlir::Block *fallthroughDest = nullptr; + + // If there's exactly one branch-after and no other threads, + // we can route it without a switch. + // Skip for SEH, since ExitSwitch is used to generate code to indicate + // abnormal termination. (SEH: Except _leave and fall-through at + // the end, all other exits in a _try (return/goto/continue/break) + // are considered as abnormal terminations, using NormalCleanupDestSlot + // to indicate abnormal termination) + assert(!cir::MissingFeatures::cleanupBranchThrough()); + assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); + + // IV. Pop the cleanup and emit it. + scope.markEmitted(); + ehStack.popCleanup(); + assert(ehStack.hasNormalCleanups() == hasEnclosingCleanups); + + emitCleanup(*this, cleanup); + + // Append the prepared cleanup prologue from above. + assert(!cir::MissingFeatures::cleanupAppendInsts()); + + // Optimistically hope that any fixups will continue falling through. + if (fixupDepth != ehStack.getNumBranchFixups()) + cgm.errorNYI("cleanup fixup depth mismatch"); + + // V. Set up the fallthrough edge out. + + // Case 1: a fallthrough source exists but doesn't branch to the + // cleanup because the cleanup is inactive. + if (!hasFallthrough && fallthroughSource) { + // Prebranched fallthrough was forwarded earlier. + // Non-prebranched fallthrough doesn't need to be forwarded. + // Either way, all we need to do is restore the IP we cleared before. + assert(!isActive); + cgm.errorNYI("cleanup inactive fallthrough"); + + // Case 2: a fallthrough source exists and should branch to the + // cleanup, but we're not supposed to branch through to the next + // cleanup. + } else if (hasFallthrough && fallthroughDest) { + cgm.errorNYI("cleanup fallthrough destination"); + + // Case 3: a fallthrough source exists and should branch to the + // cleanup and then through to the next. + } else if (hasFallthrough) { + // Everything is already set up for this. + + // Case 4: no fallthrough source exists. + } else { + // FIXME(cir): should we clear insertion point here? + } + + // VI. Assorted cleaning. + + // Check whether we can merge NormalEntry into a single predecessor. + // This might invalidate (non-IR) pointers to NormalEntry. + // + // If it did invalidate those pointers, and normalEntry was the same + // as NormalExit, go back and patch up the fixups. + assert(!cir::MissingFeatures::simplifyCleanupEntry()); + } } /// Pops cleanup blocks until the given savepoint is reached. diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h index 30f5607..9acf8b1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h @@ -20,6 +20,13 @@ namespace clang::CIRGen { +/// The MS C++ ABI needs a pointer to RTTI data plus some flags to describe the +/// type of a catch handler, so we use this wrapper. +struct CatchTypeInfo { + mlir::TypedAttr rtti; + unsigned flags; +}; + /// A protected scope for zero-cost EH handling. class EHScope { class CommonBitFields { @@ -29,6 +36,12 @@ class EHScope { enum { NumCommonBits = 3 }; protected: + class CatchBitFields { + friend class EHCatchScope; + unsigned : NumCommonBits; + unsigned numHandlers : 32 - NumCommonBits; + }; + class CleanupBitFields { friend class EHCleanupScope; unsigned : NumCommonBits; @@ -58,6 +71,7 @@ protected: union { CommonBitFields commonBits; + CatchBitFields catchBits; CleanupBitFields cleanupBits; }; @@ -67,11 +81,88 @@ public: EHScope(Kind kind) { commonBits.kind = kind; } Kind getKind() const { return static_cast<Kind>(commonBits.kind); } + + bool mayThrow() const { + // Traditional LLVM codegen also checks for `!block->use_empty()`, but + // in CIRGen the block content is not important, just used as a way to + // signal `hasEHBranches`. + assert(!cir::MissingFeatures::ehstackBranches()); + return false; + } +}; + +/// A scope which attempts to handle some, possibly all, types of +/// exceptions. +/// +/// Objective C \@finally blocks are represented using a cleanup scope +/// after the catch scope. + +class EHCatchScope : public EHScope { + // In effect, we have a flexible array member + // Handler Handlers[0]; + // But that's only standard in C99, not C++, so we have to do + // annoying pointer arithmetic instead. + +public: + struct Handler { + /// A type info value, or null MLIR attribute for a catch-all + CatchTypeInfo type; + + /// The catch handler for this type. + mlir::Region *region; + }; + +private: + friend class EHScopeStack; + + Handler *getHandlers() { return reinterpret_cast<Handler *>(this + 1); } + +public: + static size_t getSizeForNumHandlers(unsigned n) { + return sizeof(EHCatchScope) + n * sizeof(Handler); + } + + EHCatchScope(unsigned numHandlers) : EHScope(Catch) { + catchBits.numHandlers = numHandlers; + assert(catchBits.numHandlers == numHandlers && "NumHandlers overflow?"); + } + + unsigned getNumHandlers() const { return catchBits.numHandlers; } + + void setHandler(unsigned i, CatchTypeInfo type, mlir::Region *region) { + assert(i < getNumHandlers()); + getHandlers()[i].type = type; + getHandlers()[i].region = region; + } + + // Clear all handler blocks. + // FIXME: it's better to always call clearHandlerBlocks in DTOR and have a + // 'takeHandler' or some such function which removes ownership from the + // EHCatchScope object if the handlers should live longer than EHCatchScope. + void clearHandlerBlocks() { + // The blocks are owned by TryOp, nothing to delete. + } + + static bool classof(const EHScope *scope) { + return scope->getKind() == Catch; + } }; /// A cleanup scope which generates the cleanup blocks lazily. class alignas(EHScopeStack::ScopeStackAlignment) EHCleanupScope : public EHScope { + /// The nearest normal cleanup scope enclosing this one. + EHScopeStack::stable_iterator enclosingNormal; + + /// The dual entry/exit block along the normal edge. This is lazily + /// created if needed before the cleanup is popped. + mlir::Block *normalBlock = nullptr; + + /// The number of fixups required by enclosing scopes (not including + /// this one). If this is the top cleanup scope, all the fixups + /// from this index onwards belong to this scope. + unsigned fixupDepth = 0; + public: /// Gets the size required for a lazy cleanup scope with the given /// cleanup-data requirements. @@ -83,7 +174,10 @@ public: return sizeof(EHCleanupScope) + cleanupBits.cleanupSize; } - EHCleanupScope(unsigned cleanupSize) : EHScope(EHScope::Cleanup) { + EHCleanupScope(unsigned cleanupSize, unsigned fixupDepth, + EHScopeStack::stable_iterator enclosingNormal) + : EHScope(EHScope::Cleanup), enclosingNormal(enclosingNormal), + fixupDepth(fixupDepth) { // TODO(cir): When exception handling is upstreamed, isNormalCleanup and // isEHCleanup will be arguments to the constructor. cleanupBits.isNormalCleanup = true; @@ -101,11 +195,19 @@ public: // Objects of EHCleanupScope are not destructed. Use destroy(). ~EHCleanupScope() = delete; + mlir::Block *getNormalBlock() const { return normalBlock; } + void setNormalBlock(mlir::Block *bb) { normalBlock = bb; } + bool isNormalCleanup() const { return cleanupBits.isNormalCleanup; } bool isActive() const { return cleanupBits.isActive; } void setActive(bool isActive) { cleanupBits.isActive = isActive; } + unsigned getFixupDepth() const { return fixupDepth; } + EHScopeStack::stable_iterator getEnclosingNormalCleanup() const { + return enclosingNormal; + } + size_t getCleanupSize() const { return cleanupBits.cleanupSize; } void *getCleanupBuffer() { return this + 1; } @@ -147,5 +249,13 @@ EHScopeStack::find(stable_iterator savePoint) const { return iterator(endOfBuffer - savePoint.size); } +inline void EHScopeStack::popCatch() { + assert(!empty() && "popping exception stack when not empty"); + + EHCatchScope &scope = llvm::cast<EHCatchScope>(*begin()); + assert(!cir::MissingFeatures::innermostEHScope()); + deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers())); +} + } // namespace clang::CIRGen #endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp index f9ff37b..717a3e0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenException.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp @@ -69,6 +69,153 @@ mlir::LogicalResult CIRGenFunction::emitCXXTryStmt(const CXXTryStmt &s) { if (s.getTryBlock()->body_empty()) return mlir::LogicalResult::success(); - cgm.errorNYI("exitCXXTryStmt: CXXTryStmt with non-empty body"); - return mlir::LogicalResult::success(); + mlir::Location loc = getLoc(s.getSourceRange()); + // Create a scope to hold try local storage for catch params. + + mlir::OpBuilder::InsertPoint scopeIP; + cir::ScopeOp::create( + builder, loc, + /*scopeBuilder=*/[&](mlir::OpBuilder &b, mlir::Location loc) { + scopeIP = builder.saveInsertionPoint(); + }); + + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(scopeIP); + mlir::LogicalResult result = emitCXXTryStmtUnderScope(s); + cir::YieldOp::create(builder, loc); + return result; +} + +mlir::LogicalResult +CIRGenFunction::emitCXXTryStmtUnderScope(const CXXTryStmt &s) { + const llvm::Triple &t = getTarget().getTriple(); + // If we encounter a try statement on in an OpenMP target region offloaded to + // a GPU, we treat it as a basic block. + const bool isTargetDevice = + (cgm.getLangOpts().OpenMPIsTargetDevice && (t.isNVPTX() || t.isAMDGCN())); + if (isTargetDevice) { + cgm.errorNYI( + "emitCXXTryStmtUnderScope: OpenMP target region offloaded to GPU"); + return mlir::success(); + } + + unsigned numHandlers = s.getNumHandlers(); + mlir::Location tryLoc = getLoc(s.getBeginLoc()); + mlir::OpBuilder::InsertPoint beginInsertTryBody; + + bool hasCatchAll = false; + for (unsigned i = 0; i != numHandlers; ++i) { + hasCatchAll |= s.getHandler(i)->getExceptionDecl() == nullptr; + if (hasCatchAll) + break; + } + + // Create the scope to represent only the C/C++ `try {}` part. However, + // don't populate right away. Create regions for the catch handlers, + // but don't emit the handler bodies yet. For now, only make sure the + // scope returns the exception information. + auto tryOp = cir::TryOp::create( + builder, tryLoc, + /*tryBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + beginInsertTryBody = builder.saveInsertionPoint(); + }, + /*handlersBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc, + mlir::OperationState &result) { + mlir::OpBuilder::InsertionGuard guard(b); + + // We create an extra region for an unwind catch handler in case the + // catch-all handler doesn't exists + unsigned numRegionsToCreate = + hasCatchAll ? numHandlers : numHandlers + 1; + + for (unsigned i = 0; i != numRegionsToCreate; ++i) { + mlir::Region *region = result.addRegion(); + builder.createBlock(region); + } + }); + + // Finally emit the body for try/catch. + { + mlir::Location loc = tryOp.getLoc(); + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(beginInsertTryBody); + CIRGenFunction::LexicalScope tryScope{*this, loc, + builder.getInsertionBlock()}; + + tryScope.setAsTry(tryOp); + + // Attach the basic blocks for the catch regions. + enterCXXTryStmt(s, tryOp); + + // Emit the body for the `try {}` part. + { + mlir::OpBuilder::InsertionGuard guard(builder); + CIRGenFunction::LexicalScope tryBodyScope{*this, loc, + builder.getInsertionBlock()}; + if (emitStmt(s.getTryBlock(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + } + + // Emit catch clauses. + exitCXXTryStmt(s); + } + + return mlir::success(); +} + +void CIRGenFunction::enterCXXTryStmt(const CXXTryStmt &s, cir::TryOp tryOp, + bool isFnTryBlock) { + unsigned numHandlers = s.getNumHandlers(); + EHCatchScope *catchScope = ehStack.pushCatch(numHandlers); + for (unsigned i = 0; i != numHandlers; ++i) { + const CXXCatchStmt *catchStmt = s.getHandler(i); + if (catchStmt->getExceptionDecl()) { + cgm.errorNYI("enterCXXTryStmt: CatchStmt with ExceptionDecl"); + return; + } + + // No exception decl indicates '...', a catch-all. + mlir::Region *handler = &tryOp.getHandlerRegions()[i]; + catchScope->setHandler(i, cgm.getCXXABI().getCatchAllTypeInfo(), handler); + + // Under async exceptions, catch(...) needs to catch HW exception too + // Mark scope with SehTryBegin as a SEH __try scope + if (getLangOpts().EHAsynch) { + cgm.errorNYI("enterCXXTryStmt: EHAsynch"); + return; + } + } +} + +void CIRGenFunction::exitCXXTryStmt(const CXXTryStmt &s, bool isFnTryBlock) { + unsigned numHandlers = s.getNumHandlers(); + EHCatchScope &catchScope = cast<EHCatchScope>(*ehStack.begin()); + assert(catchScope.getNumHandlers() == numHandlers); + cir::TryOp tryOp = curLexScope->getTry(); + + // If the catch was not required, bail out now. + if (!catchScope.mayThrow()) { + catchScope.clearHandlerBlocks(); + ehStack.popCatch(); + + // Drop all basic block from all catch regions. + SmallVector<mlir::Block *> eraseBlocks; + for (mlir::Region &handlerRegion : tryOp.getHandlerRegions()) { + if (handlerRegion.empty()) + continue; + + for (mlir::Block &b : handlerRegion.getBlocks()) + eraseBlocks.push_back(&b); + } + + for (mlir::Block *b : eraseBlocks) + b->erase(); + + tryOp.setHandlerTypesAttr({}); + return; + } + + cgm.errorNYI("exitCXXTryStmt: Required catch"); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 9732c9c..52021fc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1675,7 +1675,25 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) { // name to make it clear it's not the actual builtin. auto fn = cast<cir::FuncOp>(curFn); if (fn.getName() != fdInlineName && onlyHasInlineBuiltinDeclaration(fd)) { - cgm.errorNYI("Inline only builtin function calls"); + cir::FuncOp clone = + mlir::cast_or_null<cir::FuncOp>(cgm.getGlobalValue(fdInlineName)); + + if (!clone) { + // Create a forward declaration - the body will be generated in + // generateCode when the function definition is processed + cir::FuncOp calleeFunc = emitFunctionDeclPointer(cgm, gd); + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(cgm.getModule().getBody()); + + clone = builder.create<cir::FuncOp>(calleeFunc.getLoc(), fdInlineName, + calleeFunc.getFunctionType()); + clone.setLinkageAttr(cir::GlobalLinkageKindAttr::get( + &cgm.getMLIRContext(), cir::GlobalLinkageKind::InternalLinkage)); + clone.setSymVisibility("private"); + clone.setInlineKindAttr(cir::InlineAttr::get( + &cgm.getMLIRContext(), cir::InlineKind::AlwaysInline)); + } + return CIRGenCallee::forDirect(clone, gd); } // Replaceable builtins provide their own implementation of a builtin. If we diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 568cbdb..d6d226b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -280,6 +280,7 @@ public: void VisitUnaryDeref(UnaryOperator *e) { emitAggLoadOfLValue(e); } void VisitStringLiteral(StringLiteral *e) { emitAggLoadOfLValue(e); } void VisitCompoundLiteralExpr(CompoundLiteralExpr *e); + void VisitPredefinedExpr(const PredefinedExpr *e) { cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitPredefinedExpr"); @@ -670,7 +671,7 @@ void AggExprEmitter::emitNullInitializationToLValue(mlir::Location loc, return; } - cgf.cgm.errorNYI("emitStoreThroughBitfieldLValue"); + cgf.emitStoreThroughBitfieldLValue(RValue::get(null), lv); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index b1e9e76..fe9e210 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -306,6 +306,7 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, mlir::cast<cir::IntAttr>(constNumElements).getValue(); unsigned numElementsWidth = count.getBitWidth(); + bool hasAnyOverflow = false; // The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as // overflow, but that should never happen. The size argument is implicitly @@ -336,11 +337,22 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, // Add in the cookie, and check whether it's overflowed. if (cookieSize != 0) { - cgf.cgm.errorNYI(e->getSourceRange(), - "emitCXXNewAllocSize: array cookie"); + // Save the current size without a cookie. This shouldn't be + // used if there was overflow + sizeWithoutCookie = cgf.getBuilder().getConstInt( + loc, allocationSize.zextOrTrunc(sizeWidth)); + + allocationSize = allocationSize.uadd_ov(cookieSize, overflow); + hasAnyOverflow |= overflow; } - size = cgf.getBuilder().getConstInt(loc, allocationSize); + // On overflow, produce a -1 so operator new will fail + if (hasAnyOverflow) { + size = + cgf.getBuilder().getConstInt(loc, llvm::APInt::getAllOnes(sizeWidth)); + } else { + size = cgf.getBuilder().getConstInt(loc, allocationSize); + } } else { // TODO: Handle the variable size case cgf.cgm.errorNYI(e->getSourceRange(), @@ -390,7 +402,50 @@ void CIRGenFunction::emitNewArrayInitializer( if (!e->hasInitializer()) return; - cgm.errorNYI(e->getSourceRange(), "emitNewArrayInitializer"); + unsigned initListElements = 0; + + const Expr *init = e->getInitializer(); + const InitListExpr *ile = dyn_cast<InitListExpr>(init); + if (ile) { + cgm.errorNYI(ile->getSourceRange(), "emitNewArrayInitializer: init list"); + return; + } + + // If all elements have already been initialized, skip any further + // initialization. + auto constOp = mlir::dyn_cast<cir::ConstantOp>(numElements.getDefiningOp()); + if (constOp) { + auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constOp.getValue()); + // Just skip out if the constant count is zero. + if (constIntAttr && constIntAttr.getUInt() <= initListElements) + return; + } + + assert(init && "have trailing elements to initialize but no initializer"); + + // If this is a constructor call, try to optimize it out, and failing that + // emit a single loop to initialize all remaining elements. + if (const CXXConstructExpr *cce = dyn_cast<CXXConstructExpr>(init)) { + CXXConstructorDecl *ctor = cce->getConstructor(); + if (ctor->isTrivial()) { + // If new expression did not specify value-initialization, then there + // is no initialization. + if (!cce->requiresZeroInitialization()) + return; + + cgm.errorNYI(cce->getSourceRange(), + "emitNewArrayInitializer: trivial ctor zero-init"); + return; + } + + cgm.errorNYI(cce->getSourceRange(), + "emitNewArrayInitializer: ctor initializer"); + return; + } + + cgm.errorNYI(init->getSourceRange(), + "emitNewArrayInitializer: unsupported initializer"); + return; } static void emitNewInitializer(CIRGenFunction &cgf, const CXXNewExpr *e, @@ -586,9 +641,6 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { // If there is a brace-initializer, cannot allocate fewer elements than inits. unsigned minElements = 0; - if (e->isArray() && e->hasInitializer()) { - cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array initializer"); - } mlir::Value numElements = nullptr; mlir::Value allocSizeWithoutCookie = nullptr; @@ -667,8 +719,11 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { !e->getOperatorDelete()->isReservedGlobalPlacementOperator()) cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: operator delete"); - if (allocSize != allocSizeWithoutCookie) - cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array with cookies"); + if (allocSize != allocSizeWithoutCookie) { + assert(e->isArray()); + allocation = cgm.getCXXABI().initializeArrayCookie( + *this, allocation, numElements, e, allocType); + } mlir::Type elementTy; if (e->isArray()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 138082b..33eb748 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -2041,8 +2041,9 @@ mlir::Value ScalarExprEmitter::VisitMemberExpr(MemberExpr *e) { assert(!cir::MissingFeatures::tryEmitAsConstant()); Expr::EvalResult result; if (e->EvaluateAsInt(result, cgf.getContext(), Expr::SE_AllowSideEffects)) { - cgf.cgm.errorNYI(e->getSourceRange(), "Constant interger member expr"); - // Fall through to emit this as a non-constant access. + llvm::APSInt value = result.Val.getInt(); + cgf.emitIgnoredExpr(e->getBase()); + return builder.getConstInt(cgf.getLoc(e->getExprLoc()), value); } return emitLoadOfLValue(e); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 25a46df..d3c0d9f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -551,6 +551,49 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, const auto funcDecl = cast<FunctionDecl>(gd.getDecl()); curGD = gd; + if (funcDecl->isInlineBuiltinDeclaration()) { + // When generating code for a builtin with an inline declaration, use a + // mangled name to hold the actual body, while keeping an external + // declaration in case the function pointer is referenced somewhere. + std::string fdInlineName = (cgm.getMangledName(funcDecl) + ".inline").str(); + cir::FuncOp clone = + mlir::cast_or_null<cir::FuncOp>(cgm.getGlobalValue(fdInlineName)); + if (!clone) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(fn); + clone = builder.create<cir::FuncOp>(fn.getLoc(), fdInlineName, + fn.getFunctionType()); + clone.setLinkage(cir::GlobalLinkageKind::InternalLinkage); + clone.setSymVisibility("private"); + clone.setInlineKind(cir::InlineKind::AlwaysInline); + } + fn.setLinkage(cir::GlobalLinkageKind::ExternalLinkage); + fn.setSymVisibility("private"); + fn = clone; + } else { + // Detect the unusual situation where an inline version is shadowed by a + // non-inline version. In that case we should pick the external one + // everywhere. That's GCC behavior too. + for (const FunctionDecl *pd = funcDecl->getPreviousDecl(); pd; + pd = pd->getPreviousDecl()) { + if (LLVM_UNLIKELY(pd->isInlineBuiltinDeclaration())) { + std::string inlineName = funcDecl->getName().str() + ".inline"; + if (auto inlineFn = mlir::cast_or_null<cir::FuncOp>( + cgm.getGlobalValue(inlineName))) { + // Replace all uses of the .inline function with the regular function + // FIXME: This performs a linear walk over the module. Introduce some + // caching here. + if (inlineFn + .replaceAllSymbolUses(fn.getSymNameAttr(), cgm.getModule()) + .failed()) + llvm_unreachable("Failed to replace inline builtin symbol uses"); + inlineFn.erase(); + } + break; + } + } + } + SourceLocation loc = funcDecl->getLocation(); Stmt *body = funcDecl->getBody(); SourceRange bodyRange = diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 5a71126..e3b9b6a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -60,11 +60,44 @@ private: /// is where the next operations will be introduced. CIRGenBuilderTy &builder; + /// A jump destination is an abstract label, branching to which may + /// require a jump out through normal cleanups. + struct JumpDest { + JumpDest() = default; + JumpDest(mlir::Block *block, EHScopeStack::stable_iterator depth = {}, + unsigned index = 0) + : block(block) {} + + bool isValid() const { return block != nullptr; } + mlir::Block *getBlock() const { return block; } + EHScopeStack::stable_iterator getScopeDepth() const { return scopeDepth; } + unsigned getDestIndex() const { return index; } + + // This should be used cautiously. + void setScopeDepth(EHScopeStack::stable_iterator depth) { + scopeDepth = depth; + } + + private: + mlir::Block *block = nullptr; + EHScopeStack::stable_iterator scopeDepth; + unsigned index; + }; + public: /// The GlobalDecl for the current function being compiled or the global /// variable currently being initialized. clang::GlobalDecl curGD; + /// Unified return block. + /// In CIR this is a function because each scope might have + /// its associated return block. + JumpDest returnBlock(mlir::Block *retBlock) { + return getJumpDestInCurrentScope(retBlock); + } + + unsigned nextCleanupDestIndex = 1; + /// The compiler-generated variable that holds the return value. std::optional<mlir::Value> fnRetAlloca; @@ -574,6 +607,16 @@ public: } }; + /// The given basic block lies in the current EH scope, but may be a + /// target of a potentially scope-crossing jump; get a stable handle + /// to which we can perform this jump later. + /// CIRGen: this mostly tracks state for figuring out the proper scope + /// information, no actual branches are emitted. + JumpDest getJumpDestInCurrentScope(mlir::Block *target) { + return JumpDest(target, ehStack.getInnermostNormalCleanup(), + nextCleanupDestIndex++); + } + /// Perform the usual unary conversions on the specified expression and /// compare the result against zero, returning an Int1Ty value. mlir::Value evaluateExprAsBool(const clang::Expr *e); @@ -954,6 +997,9 @@ public: LexicalScope *parentScope = nullptr; + // Holds the actual value for ScopeKind::Try + cir::TryOp tryOp = nullptr; + // Only Regular is used at the moment. Support for other kinds will be // added as the relevant statements/expressions are upstreamed. enum Kind { @@ -1013,6 +1059,10 @@ public: void setAsGlobalInit() { scopeKind = Kind::GlobalInit; } void setAsSwitch() { scopeKind = Kind::Switch; } void setAsTernary() { scopeKind = Kind::Ternary; } + void setAsTry(cir::TryOp op) { + scopeKind = Kind::Try; + tryOp = op; + } // Lazy create cleanup block or return what's available. mlir::Block *getOrCreateCleanupBlock(mlir::OpBuilder &builder) { @@ -1022,6 +1072,11 @@ public: return cleanupBlock; } + cir::TryOp getTry() { + assert(isTry()); + return tryOp; + } + mlir::Block *getCleanupBlock(mlir::OpBuilder &builder) { return cleanupBlock; } @@ -1209,6 +1264,8 @@ public: LValue emitBinaryOperatorLValue(const BinaryOperator *e); + cir::BrOp emitBranchThroughCleanup(mlir::Location loc, JumpDest dest); + mlir::LogicalResult emitBreakStmt(const clang::BreakStmt &s); RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID, @@ -1348,6 +1405,13 @@ public: mlir::LogicalResult emitCXXTryStmt(const clang::CXXTryStmt &s); + mlir::LogicalResult emitCXXTryStmtUnderScope(const clang::CXXTryStmt &s); + + void enterCXXTryStmt(const CXXTryStmt &s, cir::TryOp tryOp, + bool isFnTryBlock = false); + + void exitCXXTryStmt(const CXXTryStmt &s, bool isFnTryBlock = false); + void emitCtorPrologue(const clang::CXXConstructorDecl *ctor, clang::CXXCtorType ctorType, FunctionArgList &args); @@ -1595,6 +1659,10 @@ public: bool buildingTopLevelCase); mlir::LogicalResult emitSwitchStmt(const clang::SwitchStmt &s); + mlir::Value emitTargetBuiltinExpr(unsigned builtinID, + const clang::CallExpr *e, + ReturnValueSlot &returnValue); + /// Given a value and its clang type, returns the value casted to its memory /// representation. /// Note: CIR defers most of the special casting to the final lowering passes @@ -1633,6 +1701,8 @@ public: mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s); + mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e); + /// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is /// nonnull, if 1\p LHS is marked _Nonnull. void emitNullabilityCheck(LValue lhs, mlir::Value rhs, diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp index c184d4a..e620310 100644 --- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp @@ -135,8 +135,14 @@ public: cir::PointerType destCIRTy, bool isRefCast, Address src) override; - /**************************** RTTI Uniqueness ******************************/ + Address initializeArrayCookie(CIRGenFunction &cgf, Address newPtr, + mlir::Value numElements, const CXXNewExpr *e, + QualType elementType) override; + protected: + CharUnits getArrayCookieSizeImpl(QualType elementType) override; + + /**************************** RTTI Uniqueness ******************************/ /// Returns true if the ABI requires RTTI type_info objects to be unique /// across a program. virtual bool shouldRTTIBeUnique() const { return true; } @@ -2003,3 +2009,70 @@ mlir::Value CIRGenItaniumCXXABI::emitDynamicCast(CIRGenFunction &cgf, return cgf.getBuilder().createDynCast(loc, src.getPointer(), destCIRTy, isRefCast, castInfo); } + +/************************** Array allocation cookies **************************/ + +CharUnits CIRGenItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) { + // The array cookie is a size_t; pad that up to the element alignment. + // The cookie is actually right-justified in that space. + return std::max( + cgm.getSizeSize(), + cgm.getASTContext().getPreferredTypeAlignInChars(elementType)); +} + +Address CIRGenItaniumCXXABI::initializeArrayCookie(CIRGenFunction &cgf, + Address newPtr, + mlir::Value numElements, + const CXXNewExpr *e, + QualType elementType) { + assert(requiresArrayCookie(e)); + + // TODO: When sanitizer support is implemented, we'll need to + // get the address space from `newPtr`. + assert(!cir::MissingFeatures::addressSpace()); + assert(!cir::MissingFeatures::sanitizers()); + + ASTContext &ctx = cgm.getASTContext(); + CharUnits sizeSize = cgf.getSizeSize(); + mlir::Location loc = cgf.getLoc(e->getSourceRange()); + + // The size of the cookie. + CharUnits cookieSize = + std::max(sizeSize, ctx.getPreferredTypeAlignInChars(elementType)); + assert(cookieSize == getArrayCookieSizeImpl(elementType)); + + cir::PointerType u8PtrTy = cgf.getBuilder().getUInt8PtrTy(); + mlir::Value baseBytePtr = + cgf.getBuilder().createPtrBitcast(newPtr.getPointer(), u8PtrTy); + + // Compute an offset to the cookie. + CharUnits cookieOffset = cookieSize - sizeSize; + mlir::Value cookiePtrValue = baseBytePtr; + if (!cookieOffset.isZero()) { + mlir::Value offsetOp = cgf.getBuilder().getSignedInt( + loc, cookieOffset.getQuantity(), /*width=*/32); + cookiePtrValue = + cgf.getBuilder().createPtrStride(loc, cookiePtrValue, offsetOp); + } + + CharUnits baseAlignment = newPtr.getAlignment(); + CharUnits cookiePtrAlignment = baseAlignment.alignmentAtOffset(cookieOffset); + Address cookiePtr(cookiePtrValue, u8PtrTy, cookiePtrAlignment); + + // Write the number of elements into the appropriate slot. + Address numElementsPtr = + cookiePtr.withElementType(cgf.getBuilder(), cgf.SizeTy); + cgf.getBuilder().createStore(loc, numElements, numElementsPtr); + + // Finally, compute a pointer to the actual data buffer by skipping + // over the cookie completely. + mlir::Value dataOffset = + cgf.getBuilder().getSignedInt(loc, cookieSize.getQuantity(), + /*width=*/32); + mlir::Value dataPtr = + cgf.getBuilder().createPtrStride(loc, baseBytePtr, dataOffset); + mlir::Value finalPtr = + cgf.getBuilder().createPtrBitcast(dataPtr, newPtr.getElementType()); + CharUnits finalAlignment = baseAlignment.alignmentAtOffset(cookieSize); + return Address(finalPtr, newPtr.getElementType(), finalAlignment); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 127f763..6b29373 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -102,7 +102,7 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, // TODO(CIR): Should be updated once TypeSizeInfoAttr is upstreamed const unsigned sizeTypeSize = astContext.getTypeSize(astContext.getSignedSizeType()); - SizeAlignInBytes = astContext.toCharUnitsFromBits(sizeTypeSize).getQuantity(); + SizeSizeInBytes = astContext.toCharUnitsFromBits(sizeTypeSize).getQuantity(); // In CIRGenTypeCache, UIntPtrTy and SizeType are fields of the same union UIntPtrTy = cir::IntType::get(&getMLIRContext(), sizeTypeSize, /*isSigned=*/false); @@ -1917,6 +1917,17 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, const Decl *decl = globalDecl.getDecl(); func.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(decl)); } + + // If we plan on emitting this inline builtin, we can't treat it as a builtin. + const auto *fd = cast<FunctionDecl>(globalDecl.getDecl()); + if (fd->isInlineBuiltinDeclaration()) { + const FunctionDecl *fdBody; + bool hasBody = fd->hasBody(fdBody); + (void)hasBody; + assert(hasBody && "Inline builtin declarations should always have an " + "available body!"); + assert(!cir::MissingFeatures::attributeNoBuiltin()); + } } void CIRGenModule::setCIRFunctionAttributesForDefinition( diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index ad8c4d0..f486c46 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -446,54 +446,89 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { mlir::Location loc = getLoc(s.getSourceRange()); const Expr *rv = s.getRetValue(); - if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() && - s.getNRVOCandidate()->isNRVOVariable()) { - assert(!cir::MissingFeatures::openMP()); - assert(!cir::MissingFeatures::nrvo()); - } else if (!rv) { - // No return expression. Do nothing. - } else if (rv->getType()->isVoidType()) { - // Make sure not to return anything, but evaluate the expression - // for side effects. - if (rv) { - emitAnyExpr(rv); + RunCleanupsScope cleanupScope(*this); + bool createNewScope = false; + if (const auto *ewc = dyn_cast_or_null<ExprWithCleanups>(rv)) { + rv = ewc->getSubExpr(); + createNewScope = true; + } + + auto handleReturnVal = [&]() { + if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() && + s.getNRVOCandidate()->isNRVOVariable()) { + assert(!cir::MissingFeatures::openMP()); + assert(!cir::MissingFeatures::nrvo()); + } else if (!rv) { + // No return expression. Do nothing. + } else if (rv->getType()->isVoidType()) { + // Make sure not to return anything, but evaluate the expression + // for side effects. + if (rv) { + emitAnyExpr(rv); + } + } else if (cast<FunctionDecl>(curGD.getDecl()) + ->getReturnType() + ->isReferenceType()) { + // If this function returns a reference, take the address of the + // expression rather than the value. + RValue result = emitReferenceBindingToExpr(rv); + builder.CIRBaseBuilderTy::createStore(loc, result.getValue(), + *fnRetAlloca); + } else { + mlir::Value value = nullptr; + switch (CIRGenFunction::getEvaluationKind(rv->getType())) { + case cir::TEK_Scalar: + value = emitScalarExpr(rv); + if (value) { // Change this to an assert once emitScalarExpr is complete + builder.CIRBaseBuilderTy::createStore(loc, value, *fnRetAlloca); + } + break; + case cir::TEK_Complex: + emitComplexExprIntoLValue(rv, + makeAddrLValue(returnValue, rv->getType()), + /*isInit=*/true); + break; + case cir::TEK_Aggregate: + assert(!cir::MissingFeatures::aggValueSlotGC()); + emitAggExpr(rv, AggValueSlot::forAddr(returnValue, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::IsNotAliased, + getOverlapForReturnValue())); + break; + } } - } else if (cast<FunctionDecl>(curGD.getDecl()) - ->getReturnType() - ->isReferenceType()) { - // If this function returns a reference, take the address of the - // expression rather than the value. - RValue result = emitReferenceBindingToExpr(rv); - builder.CIRBaseBuilderTy::createStore(loc, result.getValue(), *fnRetAlloca); + }; + + if (!createNewScope) { + handleReturnVal(); } else { - mlir::Value value = nullptr; - switch (CIRGenFunction::getEvaluationKind(rv->getType())) { - case cir::TEK_Scalar: - value = emitScalarExpr(rv); - if (value) { // Change this to an assert once emitScalarExpr is complete - builder.CIRBaseBuilderTy::createStore(loc, value, *fnRetAlloca); - } - break; - case cir::TEK_Complex: - emitComplexExprIntoLValue(rv, makeAddrLValue(returnValue, rv->getType()), - /*isInit=*/true); - break; - case cir::TEK_Aggregate: - assert(!cir::MissingFeatures::aggValueSlotGC()); - emitAggExpr(rv, AggValueSlot::forAddr(returnValue, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::IsNotAliased, - getOverlapForReturnValue())); - break; + mlir::Location scopeLoc = + getLoc(rv ? rv->getSourceRange() : s.getSourceRange()); + // First create cir.scope and later emit it's body. Otherwise all CIRGen + // dispatched by `handleReturnVal()` might needs to manipulate blocks and + // look into parents, which are all unlinked. + mlir::OpBuilder::InsertPoint scopeBody; + cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + scopeBody = b.saveInsertionPoint(); + }); + { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(scopeBody); + CIRGenFunction::LexicalScope lexScope{*this, scopeLoc, + builder.getInsertionBlock()}; + handleReturnVal(); } } + cleanupScope.forceCleanup(); + + // In CIR we might have returns in different scopes. + // FIXME(cir): cleanup code is handling actual return emission, the logic + // should try to match traditional codegen more closely (to the extent which + // is possible). auto *retBlock = curLexScope->getOrCreateRetBlock(*this, loc); - // This should emit a branch through the cleanup block if one exists. - builder.create<cir::BrOp>(loc, retBlock); - assert(!cir::MissingFeatures::emitBranchThroughCleanup()); - if (ehStack.stable_begin() != currentCleanupStackDepth) - cgm.errorNYI(s.getSourceRange(), "return with cleanup stack"); + emitBranchThroughCleanup(loc, returnBlock(retBlock)); // Insert the new block to continue codegen after branch to ret block. builder.createBlock(builder.getBlock()->getParent()); @@ -1063,5 +1098,5 @@ void CIRGenFunction::emitReturnOfRValue(mlir::Location loc, RValue rv, assert(!cir::MissingFeatures::emitBranchThroughCleanup()); builder.create<cir::BrOp>(loc, retBlock); if (ehStack.stable_begin() != currentCleanupStackDepth) - cgm.errorNYI(loc, "return with cleanup stack"); + cgm.errorNYI(loc, "return of r-value with cleanup stack"); } diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h index b5612d9..ff5842c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h +++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h @@ -74,11 +74,17 @@ struct CIRGenTypeCache { unsigned char PointerSizeInBytes; }; - /// The alignment of size_t. - unsigned char SizeAlignInBytes; + /// The size and alignment of size_t. + union { + unsigned char SizeSizeInBytes; // sizeof(size_t) + unsigned char SizeAlignInBytes; + }; cir::TargetAddressSpaceAttr cirAllocaAddressSpace; + clang::CharUnits getSizeSize() const { + return clang::CharUnits::fromQuantity(SizeSizeInBytes); + } clang::CharUnits getSizeAlign() const { return clang::CharUnits::fromQuantity(SizeAlignInBytes); } diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 36db4bd..7c31bea 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -11,13 +11,14 @@ add_clang_library(clangCIR CIRGenAsm.cpp CIRGenAtomic.cpp CIRGenBuilder.cpp + CIRGenBuiltin.cpp + CIRGenBuiltinX86.cpp CIRGenCall.cpp CIRGenClass.cpp CIRGenCleanup.cpp CIRGenCoroutine.cpp CIRGenCXX.cpp CIRGenCXXABI.cpp - CIRGenBuiltin.cpp CIRGenDecl.cpp CIRGenDeclCXX.cpp CIRGenDeclOpenACC.cpp diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h index 67a72f5..4198c23 100644 --- a/clang/lib/CIR/CodeGen/EHScopeStack.h +++ b/clang/lib/CIR/CodeGen/EHScopeStack.h @@ -18,12 +18,38 @@ #ifndef CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H #define CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "llvm/ADT/SmallVector.h" namespace clang::CIRGen { class CIRGenFunction; +/// A branch fixup. These are required when emitting a goto to a +/// label which hasn't been emitted yet. The goto is optimistically +/// emitted as a branch to the basic block for the label, and (if it +/// occurs in a scope with non-trivial cleanups) a fixup is added to +/// the innermost cleanup. When a (normal) cleanup is popped, any +/// unresolved fixups in that scope are threaded through the cleanup. +struct BranchFixup { + /// The block containing the terminator which needs to be modified + /// into a switch if this fixup is resolved into the current scope. + /// If null, LatestBranch points directly to the destination. + mlir::Block *optimisticBranchBlock = nullptr; + + /// The ultimate destination of the branch. + /// + /// This can be set to null to indicate that this fixup was + /// successfully resolved. + mlir::Block *destination = nullptr; + + /// The destination index value. + unsigned destinationIndex = 0; + + /// The initial branch of the fixup. + cir::BrOp initialBranch = {}; +}; + enum CleanupKind : unsigned { /// Denotes a cleanup that should run when a scope is exited using exceptional /// control flow (a throw statement leading to stack unwinding, ). @@ -126,9 +152,31 @@ private: /// The first valid entry in the buffer. char *startOfData = nullptr; + /// The innermost normal cleanup on the stack. + stable_iterator innermostNormalCleanup = stable_end(); + /// The CGF this Stack belong to CIRGenFunction *cgf = nullptr; + /// The current set of branch fixups. A branch fixup is a jump to + /// an as-yet unemitted label, i.e. a label for which we don't yet + /// know the EH stack depth. Whenever we pop a cleanup, we have + /// to thread all the current branch fixups through it. + /// + /// Fixups are recorded as the Use of the respective branch or + /// switch statement. The use points to the final destination. + /// When popping out of a cleanup, these uses are threaded through + /// the cleanup and adjusted to point to the new cleanup. + /// + /// Note that branches are allowed to jump into protected scopes + /// in certain situations; e.g. the following code is legal: + /// struct A { ~A(); }; // trivial ctor, non-trivial dtor + /// goto foo; + /// A a; + /// foo: + /// bar(); + llvm::SmallVector<BranchFixup> branchFixups; + // This class uses a custom allocator for maximum efficiency because cleanups // are allocated and freed very frequently. It's basically a bump pointer // allocator, but we can't use LLVM's BumpPtrAllocator because we use offsets @@ -155,9 +203,29 @@ public: /// Pops a cleanup scope off the stack. This is private to CIRGenCleanup.cpp. void popCleanup(); + /// Push a set of catch handlers on the stack. The catch is + /// uninitialized and will need to have the given number of handlers + /// set on it. + class EHCatchScope *pushCatch(unsigned numHandlers); + + /// Pops a catch scope off the stack. This is private to CIRGenException.cpp. + void popCatch(); + /// Determines whether the exception-scopes stack is empty. bool empty() const { return startOfData == endOfBuffer; } + /// Determines whether there are any normal cleanups on the stack. + bool hasNormalCleanups() const { + return innermostNormalCleanup != stable_end(); + } + + /// Returns the innermost normal cleanup on the stack, or + /// stable_end() if there are no normal cleanups. + stable_iterator getInnermostNormalCleanup() const { + return innermostNormalCleanup; + } + stable_iterator getInnermostActiveNormalCleanup() const; + /// An unstable reference to a scope-stack depth. Invalidated by /// pushes but not pops. class iterator; @@ -172,12 +240,30 @@ public: return stable_iterator(endOfBuffer - startOfData); } + /// Create a stable reference to the bottom of the EH stack. + static stable_iterator stable_end() { return stable_iterator(0); } + /// Turn a stable reference to a scope depth into a unstable pointer /// to the EH stack. iterator find(stable_iterator savePoint) const; - /// Create a stable reference to the bottom of the EH stack. - static stable_iterator stable_end() { return stable_iterator(0); } + /// Add a branch fixup to the current cleanup scope. + BranchFixup &addBranchFixup() { + assert(hasNormalCleanups() && "adding fixup in scope without cleanups"); + branchFixups.push_back(BranchFixup()); + return branchFixups.back(); + } + + unsigned getNumBranchFixups() const { return branchFixups.size(); } + BranchFixup &getBranchFixup(unsigned i) { + assert(i < getNumBranchFixups()); + return branchFixups[i]; + } + + /// Pops lazily-removed fixups from the end of the list. This + /// should only be called by procedures which have just popped a + /// cleanup or resolved one or more fixups. + void popNullFixups(); }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index ed606b7..fa180f5 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -2941,6 +2941,21 @@ mlir::LogicalResult cir::ThrowOp::verify() { } //===----------------------------------------------------------------------===// +// AtomicFetchOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::AtomicFetchOp::verify() { + if (getBinop() != cir::AtomicFetchKind::Add && + getBinop() != cir::AtomicFetchKind::Sub && + getBinop() != cir::AtomicFetchKind::Max && + getBinop() != cir::AtomicFetchKind::Min && + !mlir::isa<cir::IntType>(getVal().getType())) + return emitError("only atomic add, sub, max, and min operation could " + "operate on floating-point values"); + return success(); +} + +//===----------------------------------------------------------------------===// // TypeInfoAttr //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index 8589a2e..46bd186 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -551,10 +551,100 @@ public: } }; +class CIRTryOpFlattening : public mlir::OpRewritePattern<cir::TryOp> { +public: + using OpRewritePattern<cir::TryOp>::OpRewritePattern; + + mlir::Block *buildTryBody(cir::TryOp tryOp, + mlir::PatternRewriter &rewriter) const { + // Split the current block before the TryOp to create the inlining + // point. + mlir::Block *beforeTryScopeBlock = rewriter.getInsertionBlock(); + mlir::Block *afterTry = + rewriter.splitBlock(beforeTryScopeBlock, rewriter.getInsertionPoint()); + + // Inline body region. + mlir::Block *beforeBody = &tryOp.getTryRegion().front(); + rewriter.inlineRegionBefore(tryOp.getTryRegion(), afterTry); + + // Branch into the body of the region. + rewriter.setInsertionPointToEnd(beforeTryScopeBlock); + cir::BrOp::create(rewriter, tryOp.getLoc(), mlir::ValueRange(), beforeBody); + return afterTry; + } + + void buildHandlers(cir::TryOp tryOp, mlir::PatternRewriter &rewriter, + mlir::Block *afterBody, mlir::Block *afterTry, + SmallVectorImpl<cir::CallOp> &callsToRewrite, + SmallVectorImpl<mlir::Block *> &landingPads) const { + // Replace the tryOp return with a branch that jumps out of the body. + rewriter.setInsertionPointToEnd(afterBody); + + mlir::Block *beforeCatch = rewriter.getInsertionBlock(); + rewriter.setInsertionPointToEnd(beforeCatch); + + // Check if the terminator is a YieldOp because there could be another + // terminator, e.g. unreachable + if (auto tryBodyYield = dyn_cast<cir::YieldOp>(afterBody->getTerminator())) + rewriter.replaceOpWithNewOp<cir::BrOp>(tryBodyYield, afterTry); + + mlir::ArrayAttr handlers = tryOp.getHandlerTypesAttr(); + if (!handlers || handlers.empty()) + return; + + llvm_unreachable("TryOpFlattening buildHandlers with CallsOp is NYI"); + } + + mlir::LogicalResult + matchAndRewrite(cir::TryOp tryOp, + mlir::PatternRewriter &rewriter) const override { + mlir::OpBuilder::InsertionGuard guard(rewriter); + mlir::Block *afterBody = &tryOp.getTryRegion().back(); + + // Grab the collection of `cir.call exception`s to rewrite to + // `cir.try_call`. + llvm::SmallVector<cir::CallOp, 4> callsToRewrite; + tryOp.getTryRegion().walk([&](CallOp op) { + // Only grab calls within immediate closest TryOp scope. + if (op->getParentOfType<cir::TryOp>() != tryOp) + return; + assert(!cir::MissingFeatures::opCallExceptionAttr()); + callsToRewrite.push_back(op); + }); + + if (!callsToRewrite.empty()) + llvm_unreachable( + "TryOpFlattening with try block that contains CallOps is NYI"); + + // Build try body. + mlir::Block *afterTry = buildTryBody(tryOp, rewriter); + + // Build handlers. + llvm::SmallVector<mlir::Block *, 4> landingPads; + buildHandlers(tryOp, rewriter, afterBody, afterTry, callsToRewrite, + landingPads); + + rewriter.eraseOp(tryOp); + + assert((landingPads.size() == callsToRewrite.size()) && + "expected matching number of entries"); + + // Quick block cleanup: no indirection to the post try block. + auto brOp = dyn_cast<cir::BrOp>(afterTry->getTerminator()); + if (brOp && brOp.getDest()->hasNoPredecessors()) { + mlir::Block *srcBlock = brOp.getDest(); + rewriter.eraseOp(brOp); + rewriter.mergeBlocks(srcBlock, afterTry); + } + + return mlir::success(); + } +}; + void populateFlattenCFGPatterns(RewritePatternSet &patterns) { patterns .add<CIRIfFlattening, CIRLoopOpInterfaceFlattening, CIRScopeOpFlattening, - CIRSwitchOpFlattening, CIRTernaryOpFlattening>( + CIRSwitchOpFlattening, CIRTernaryOpFlattening, CIRTryOpFlattening>( patterns.getContext()); } @@ -568,7 +658,7 @@ void CIRFlattenCFGPass::runOnOperation() { assert(!cir::MissingFeatures::ifOp()); assert(!cir::MissingFeatures::switchOp()); assert(!cir::MissingFeatures::tryOp()); - if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp>(op)) + if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, TryOp>(op)) ops.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 3fc94eb..bb75f2d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -770,6 +770,147 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite( return mlir::success(); } +static mlir::LLVM::AtomicBinOp +getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) { + switch (k) { + case cir::AtomicFetchKind::Add: + return isInt ? mlir::LLVM::AtomicBinOp::add : mlir::LLVM::AtomicBinOp::fadd; + case cir::AtomicFetchKind::Sub: + return isInt ? mlir::LLVM::AtomicBinOp::sub : mlir::LLVM::AtomicBinOp::fsub; + case cir::AtomicFetchKind::And: + return mlir::LLVM::AtomicBinOp::_and; + case cir::AtomicFetchKind::Xor: + return mlir::LLVM::AtomicBinOp::_xor; + case cir::AtomicFetchKind::Or: + return mlir::LLVM::AtomicBinOp::_or; + case cir::AtomicFetchKind::Nand: + return mlir::LLVM::AtomicBinOp::nand; + case cir::AtomicFetchKind::Max: { + if (!isInt) + return mlir::LLVM::AtomicBinOp::fmax; + return isSignedInt ? mlir::LLVM::AtomicBinOp::max + : mlir::LLVM::AtomicBinOp::umax; + } + case cir::AtomicFetchKind::Min: { + if (!isInt) + return mlir::LLVM::AtomicBinOp::fmin; + return isSignedInt ? mlir::LLVM::AtomicBinOp::min + : mlir::LLVM::AtomicBinOp::umin; + } + } + llvm_unreachable("Unknown atomic fetch opcode"); +} + +static llvm::StringLiteral getLLVMBinop(cir::AtomicFetchKind k, bool isInt) { + switch (k) { + case cir::AtomicFetchKind::Add: + return isInt ? mlir::LLVM::AddOp::getOperationName() + : mlir::LLVM::FAddOp::getOperationName(); + case cir::AtomicFetchKind::Sub: + return isInt ? mlir::LLVM::SubOp::getOperationName() + : mlir::LLVM::FSubOp::getOperationName(); + case cir::AtomicFetchKind::And: + return mlir::LLVM::AndOp::getOperationName(); + case cir::AtomicFetchKind::Xor: + return mlir::LLVM::XOrOp::getOperationName(); + case cir::AtomicFetchKind::Or: + return mlir::LLVM::OrOp::getOperationName(); + case cir::AtomicFetchKind::Nand: + // There's no nand binop in LLVM, this is later fixed with a not. + return mlir::LLVM::AndOp::getOperationName(); + case cir::AtomicFetchKind::Max: + case cir::AtomicFetchKind::Min: + llvm_unreachable("handled in buildMinMaxPostOp"); + } + llvm_unreachable("Unknown atomic fetch opcode"); +} + +mlir::Value CIRToLLVMAtomicFetchOpLowering::buildPostOp( + cir::AtomicFetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter, mlir::Value rmwVal, + bool isInt) const { + SmallVector<mlir::Value> atomicOperands = {rmwVal, adaptor.getVal()}; + SmallVector<mlir::Type> atomicResTys = {rmwVal.getType()}; + return rewriter + .create(op.getLoc(), + rewriter.getStringAttr(getLLVMBinop(op.getBinop(), isInt)), + atomicOperands, atomicResTys, {}) + ->getResult(0); +} + +mlir::Value CIRToLLVMAtomicFetchOpLowering::buildMinMaxPostOp( + cir::AtomicFetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter, mlir::Value rmwVal, bool isInt, + bool isSigned) const { + mlir::Location loc = op.getLoc(); + + if (!isInt) { + if (op.getBinop() == cir::AtomicFetchKind::Max) + return mlir::LLVM::MaxNumOp::create(rewriter, loc, rmwVal, + adaptor.getVal()); + return mlir::LLVM::MinNumOp::create(rewriter, loc, rmwVal, + adaptor.getVal()); + } + + mlir::LLVM::ICmpPredicate pred; + if (op.getBinop() == cir::AtomicFetchKind::Max) { + pred = isSigned ? mlir::LLVM::ICmpPredicate::sgt + : mlir::LLVM::ICmpPredicate::ugt; + } else { // Min + pred = isSigned ? mlir::LLVM::ICmpPredicate::slt + : mlir::LLVM::ICmpPredicate::ult; + } + mlir::Value cmp = mlir::LLVM::ICmpOp::create( + rewriter, loc, + mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(), pred), rmwVal, + adaptor.getVal()); + return mlir::LLVM::SelectOp::create(rewriter, loc, cmp, rmwVal, + adaptor.getVal()); +} + +mlir::LogicalResult CIRToLLVMAtomicFetchOpLowering::matchAndRewrite( + cir::AtomicFetchOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + bool isInt = false; + bool isSignedInt = false; + if (auto intTy = mlir::dyn_cast<cir::IntType>(op.getVal().getType())) { + isInt = true; + isSignedInt = intTy.isSigned(); + } else if (mlir::isa<cir::SingleType, cir::DoubleType>( + op.getVal().getType())) { + isInt = false; + } else { + return op.emitError() << "Unsupported type: " << op.getVal().getType(); + } + + mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(op.getMemOrder()); + mlir::LLVM::AtomicBinOp llvmBinOp = + getLLVMAtomicBinOp(op.getBinop(), isInt, isSignedInt); + auto rmwVal = mlir::LLVM::AtomicRMWOp::create(rewriter, op.getLoc(), + llvmBinOp, adaptor.getPtr(), + adaptor.getVal(), llvmOrder); + + mlir::Value result = rmwVal.getResult(); + if (!op.getFetchFirst()) { + if (op.getBinop() == cir::AtomicFetchKind::Max || + op.getBinop() == cir::AtomicFetchKind::Min) + result = buildMinMaxPostOp(op, adaptor, rewriter, rmwVal.getRes(), isInt, + isSignedInt); + else + result = buildPostOp(op, adaptor, rewriter, rmwVal.getRes(), isInt); + + // Compensate lack of nand binop in LLVM IR. + if (op.getBinop() == cir::AtomicFetchKind::Nand) { + auto negOne = mlir::LLVM::ConstantOp::create(rewriter, op.getLoc(), + result.getType(), -1); + result = mlir::LLVM::XOrOp::create(rewriter, op.getLoc(), result, negOne); + } + } + + rewriter.replaceOp(op, result); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMBitClrsbOpLowering::matchAndRewrite( cir::BitClrsbOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 954ecab..61beb04 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -290,6 +290,8 @@ void arm::setArchNameInTriple(const Driver &D, const ArgList &Args, // Thumb2 is the default for V7 on Darwin. (llvm::ARM::parseArchVersion(Suffix) == 7 && Triple.isOSBinFormatMachO()) || + // Thumb2 is the default for Fuchsia. + Triple.isOSFuchsia() || // FIXME: this is invalid for WindowsCE Triple.isOSWindows(); @@ -452,6 +454,9 @@ arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) { case llvm::Triple::OpenBSD: return FloatABI::SoftFP; + case llvm::Triple::Fuchsia: + return FloatABI::Hard; + default: if (Triple.isOHOSFamily()) return FloatABI::Soft; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 37c10c6..e5abf83 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -798,9 +798,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, } if (!DryRun) { + const bool ContinuePPDirective = + State.Line->InMacroBody && Current.isNot(TT_LineComment); Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, State.Column + Spaces + PPColumnCorrection, - /*IsAligned=*/false, State.Line->InMacroBody); + /*IsAligned=*/false, ContinuePPDirective); } // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance @@ -1176,10 +1178,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // about removing empty lines on closing blocks. Special case them here. MaxEmptyLinesToKeep = 1; } - unsigned Newlines = + const unsigned Newlines = std::max(1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep)); - bool ContinuePPDirective = - State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; + const bool ContinuePPDirective = State.Line->InPPDirective && + State.Line->Type != LT_ImportStatement && + Current.isNot(TT_LineComment); Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, CurrentState.IsAligned, ContinuePPDirective); } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 39fa25f..215ac18 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2214,9 +2214,9 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { else PD << "expression"; - if (Diag(Loc, PD, FD) - << false /*show bit size*/ << 0 << Ty << false /*return*/ - << TI.getTriple().str()) { + if (Diag(Loc, PD) << false /*show bit size*/ << 0 << Ty + << false /*return*/ + << TI.getTriple().str()) { if (D) D->setInvalidDecl(); } @@ -2233,9 +2233,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { else PD << "expression"; - if (Diag(Loc, PD, FD) - << false /*show bit size*/ << 0 << Ty << true /*return*/ - << TI.getTriple().str()) { + if (Diag(Loc, PD) << false /*show bit size*/ << 0 << Ty << true /*return*/ + << TI.getTriple().str()) { if (D) D->setInvalidDecl(); } diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp index 9b677f4..bf32491 100644 --- a/clang/lib/Sema/SemaBase.cpp +++ b/clang/lib/Sema/SemaBase.cpp @@ -58,13 +58,13 @@ SemaBase::SemaDiagnosticBuilder::getDeviceDeferredDiags() const { return S.DeviceDeferredDiags; } -Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID, - bool DeferHint) { +Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, + unsigned DiagID) { bool IsError = getDiagnostics().getDiagnosticIDs()->isDefaultMappingAsError(DiagID); bool ShouldDefer = getLangOpts().CUDA && getLangOpts().GPUDeferDiag && DiagnosticIDs::isDeferrable(DiagID) && - (DeferHint || SemaRef.DeferDiags || !IsError); + (SemaRef.DeferDiags || !IsError); auto SetIsLastErrorImmediate = [&](bool Flag) { if (IsError) SemaRef.IsLastErrorImmediate = Flag; @@ -83,16 +83,13 @@ Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID, } Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, - const PartialDiagnostic &PD, - bool DeferHint) { - return Diag(Loc, PD.getDiagID(), DeferHint) << PD; + const PartialDiagnostic &PD) { + return Diag(Loc, PD.getDiagID()) << PD; } SemaBase::SemaDiagnosticBuilder SemaBase::DiagCompat(SourceLocation Loc, - unsigned CompatDiagId, - bool DeferHint) { + unsigned CompatDiagId) { return Diag(Loc, - DiagnosticIDs::getCXXCompatDiagId(getLangOpts(), CompatDiagId), - DeferHint); + DiagnosticIDs::getCXXCompatDiagId(getLangOpts(), CompatDiagId)); } } // namespace clang diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 7da09e8..1f25111 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13208,7 +13208,10 @@ void OverloadCandidateSet::NoteCandidates( auto Cands = CompleteCandidates(S, OCD, Args, OpLoc, Filter); - S.Diag(PD.first, PD.second, shouldDeferDiags(S, Args, OpLoc)); + { + Sema::DeferDiagsRAII RAII{S, shouldDeferDiags(S, Args, OpLoc)}; + S.Diag(PD.first, PD.second); + } // In WebAssembly we don't want to emit further diagnostics if a table is // passed as an argument to a function. @@ -13271,10 +13274,10 @@ void OverloadCandidateSet::NoteCandidates(Sema &S, ArrayRef<Expr *> Args, // inform the future value of S.Diags.getNumOverloadCandidatesToShow(). S.Diags.overloadCandidatesShown(CandsShown); - if (I != E) - S.Diag(OpLoc, diag::note_ovl_too_many_candidates, - shouldDeferDiags(S, Args, OpLoc)) - << int(E - I); + if (I != E) { + Sema::DeferDiagsRAII RAII{S, shouldDeferDiags(S, Args, OpLoc)}; + S.Diag(OpLoc, diag::note_ovl_too_many_candidates) << int(E - I); + } } static SourceLocation diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index c5ef0d5..b5f91a3 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -1445,21 +1445,21 @@ void SemaRISCV::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D, if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Double) && !FeatureMap.lookup("zve64d")) - Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64d"; + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve64d"; // (ELEN, LMUL) pairs of (8, mf8), (16, mf4), (32, mf2), (64, m1) requires at // least zve64x else if (((EltSize == 64 && Info.ElementType->isIntegerType()) || MinElts == 1) && !FeatureMap.lookup("zve64x")) - Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64x"; + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve64x"; else if (Info.ElementType->isFloat16Type() && !FeatureMap.lookup("zvfh") && !FeatureMap.lookup("zvfhmin") && !FeatureMap.lookup("xandesvpackfph")) if (DeclareAndesVectorBuiltins) { - Diag(Loc, diag::err_riscv_type_requires_extension, D) + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zvfh, zvfhmin or xandesvpackfph"; } else { - Diag(Loc, diag::err_riscv_type_requires_extension, D) + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zvfh or zvfhmin"; } else if (Info.ElementType->isBFloat16Type() && @@ -1467,18 +1467,18 @@ void SemaRISCV::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D, !FeatureMap.lookup("xandesvbfhcvt") && !FeatureMap.lookup("experimental-zvfbfa")) if (DeclareAndesVectorBuiltins) { - Diag(Loc, diag::err_riscv_type_requires_extension, D) + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zvfbfmin or xandesvbfhcvt"; } else { - Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfbfmin"; + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zvfbfmin"; } else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Float) && !FeatureMap.lookup("zve32f")) - Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f"; + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve32f"; // Given that caller already checked isRVVType() before calling this function, // if we don't have at least zve32x supported, then we need to emit error. else if (!FeatureMap.lookup("zve32x")) - Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32x"; + Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve32x"; } /// Are the two types RVV-bitcast-compatible types? I.e. is bitcasting from the diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c index cf20226..6579988 100644 --- a/clang/test/CIR/CodeGen/atomic.c +++ b/clang/test/CIR/CodeGen/atomic.c @@ -584,3 +584,526 @@ void clear_volatile(volatile void *p) { // OGCG: store atomic volatile i8 0, ptr %{{.+}} seq_cst, align 1 } + +int atomic_fetch_add(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_add + // LLVM-LABEL: @atomic_fetch_add + // OGCG-LABEL: @atomic_fetch_add + + return __atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_add_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_add_fetch + // LLVM-LABEL: @atomic_add_fetch + // OGCG-LABEL: @atomic_add_fetch + + return __atomic_add_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch add seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw add ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = add i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw add ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = add i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_add(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_add + // LLVM-LABEL: @c11_atomic_fetch_add + // OGCG-LABEL: @c11_atomic_fetch_add + + return __c11_atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_sub(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_sub + // LLVM-LABEL: @atomic_fetch_sub + // OGCG-LABEL: @atomic_fetch_sub + + return __atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_sub_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_sub_fetch + // LLVM-LABEL: @atomic_sub_fetch + // OGCG-LABEL: @atomic_sub_fetch + + return __atomic_sub_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw sub ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = sub i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw sub ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = sub i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_sub(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_sub + // LLVM-LABEL: @c11_atomic_fetch_sub + // OGCG-LABEL: @c11_atomic_fetch_sub + + return __c11_atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_fetch_add_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_fetch_add_fp + // LLVM-LABEL: @atomic_fetch_add_fp + // OGCG-LABEL: @atomic_fetch_add_fp + + return __atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_add_fetch_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_add_fetch_fp + // LLVM-LABEL: @atomic_add_fetch_fp + // OGCG-LABEL: @atomic_add_fetch_fp + + return __atomic_add_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch add seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[OLD:.+]] = atomicrmw fadd ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = fadd float %[[OLD]], %[[VAL]] + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw fadd ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = fadd float %[[OLD]], %[[VAL]] + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float c11_atomic_fetch_sub_fp(_Atomic(float) *ptr, float value) { + // CIR-LABEL: @c11_atomic_fetch_sub_fp + // LLVM-LABEL: @c11_atomic_fetch_sub_fp + // OGCG-LABEL: @c11_atomic_fetch_sub_fp + + return __c11_atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fsub ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fsub ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_min(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_min + // LLVM-LABEL: @atomic_fetch_min + // OGCG-LABEL: @atomic_fetch_min + + return __atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_min_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_min_fetch + // LLVM-LABEL: @atomic_min_fetch + // OGCG-LABEL: @atomic_min_fetch + + return __atomic_min_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw min ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[OLD_LESS:.+]] = icmp slt i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: %[[RES:.+]] = select i1 %[[OLD_LESS]], i32 %[[OLD]], i32 %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw min ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[OLD_LESS:.+]] = icmp slt i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: %[[RES:.+]] = select i1 %[[OLD_LESS]], i32 %[[OLD]], i32 %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_min(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_min + // LLVM-LABEL: @c11_atomic_fetch_min + // OGCG-LABEL: @c11_atomic_fetch_min + + return __c11_atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_fetch_min_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_fetch_min_fp + // LLVM-LABEL: @atomic_fetch_min_fp + // OGCG-LABEL: @atomic_fetch_min_fp + + return __atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_min_fetch_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_min_fetch_fp + // LLVM-LABEL: @atomic_min_fetch_fp + // OGCG-LABEL: @atomic_min_fetch_fp + + return __atomic_min_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[OLD:.+]] = atomicrmw fmin ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = call float @llvm.minnum.f32(float %[[OLD]], float %[[VAL]]) + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw fmin ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = call float @llvm.minnum.f32(float %[[OLD]], float %[[VAL]]) + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float c11_atomic_fetch_min_fp(_Atomic(float) *ptr, float value) { + // CIR-LABEL: @c11_atomic_fetch_min_fp + // LLVM-LABEL: @c11_atomic_fetch_min_fp + // OGCG-LABEL: @c11_atomic_fetch_min_fp + + return __c11_atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_max(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_max + // LLVM-LABEL: @atomic_fetch_max + // OGCG-LABEL: @atomic_fetch_max + + return __atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_max_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_max_fetch + // LLVM-LABEL: @atomic_max_fetch + // OGCG-LABEL: @atomic_max_fetch + + return __atomic_max_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw max ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[OLD_GREATER:.+]] = icmp sgt i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: %[[RES:.+]] = select i1 %[[OLD_GREATER]], i32 %[[OLD]], i32 %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw max ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[OLD_GREATER:.+]] = icmp sgt i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: %[[RES:.+]] = select i1 %[[OLD_GREATER]], i32 %[[OLD]], i32 %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_max(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_max + // LLVM-LABEL: @c11_atomic_fetch_max + // OGCG-LABEL: @c11_atomic_fetch_max + + return __c11_atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_fetch_max_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_fetch_max_fp + // LLVM-LABEL: @atomic_fetch_max_fp + // OGCG-LABEL: @atomic_fetch_max_fp + + return __atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float atomic_max_fetch_fp(float *ptr, float value) { + // CIR-LABEL: @atomic_max_fetch_fp + // LLVM-LABEL: @atomic_max_fetch_fp + // OGCG-LABEL: @atomic_max_fetch_fp + + return __atomic_max_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[OLD:.+]] = atomicrmw fmax ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = call float @llvm.maxnum.f32(float %[[OLD]], float %[[VAL]]) + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw fmax ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = call float @llvm.maxnum.f32(float %[[OLD]], float %[[VAL]]) + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +float c11_atomic_fetch_max_fp(_Atomic(float) *ptr, float value) { + // CIR-LABEL: @c11_atomic_fetch_max_fp + // LLVM-LABEL: @c11_atomic_fetch_max_fp + // OGCG-LABEL: @c11_atomic_fetch_max_fp + + return __c11_atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + + // LLVM: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_and(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_and + // LLVM-LABEL: @atomic_fetch_and + // OGCG-LABEL: @atomic_fetch_and + + return __atomic_fetch_and(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch and seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_and_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_and_fetch + // LLVM-LABEL: @atomic_and_fetch + // OGCG-LABEL: @atomic_and_fetch + + return __atomic_and_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch and seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw and ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = and i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw and ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = and i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_and(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_and + // LLVM-LABEL: @c11_atomic_fetch_and + // OGCG-LABEL: @c11_atomic_fetch_and + + return __c11_atomic_fetch_and(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch and seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_or(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_or + // LLVM-LABEL: @atomic_fetch_or + // OGCG-LABEL: @atomic_fetch_or + + return __atomic_fetch_or(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch or seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_or_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_or_fetch + // LLVM-LABEL: @atomic_or_fetch + // OGCG-LABEL: @atomic_or_fetch + + return __atomic_or_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch or seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw or ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = or i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw or ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = or i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_or(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_or + // LLVM-LABEL: @c11_atomic_fetch_or + // OGCG-LABEL: @c11_atomic_fetch_or + + return __c11_atomic_fetch_or(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch or seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_xor(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_xor + // LLVM-LABEL: @atomic_fetch_xor + // OGCG-LABEL: @atomic_fetch_xor + + return __atomic_fetch_xor(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_xor_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_xor_fetch + // LLVM-LABEL: @atomic_xor_fetch + // OGCG-LABEL: @atomic_xor_fetch + + return __atomic_xor_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw xor ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[RES:.+]] = xor i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw xor ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[RES:.+]] = xor i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_xor(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_xor + // LLVM-LABEL: @c11_atomic_fetch_xor + // OGCG-LABEL: @c11_atomic_fetch_xor + + return __c11_atomic_fetch_xor(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_fetch_nand(int *ptr, int value) { + // CIR-LABEL: @atomic_fetch_nand + // LLVM-LABEL: @atomic_fetch_nand + // OGCG-LABEL: @atomic_fetch_nand + + return __atomic_fetch_nand(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int atomic_nand_fetch(int *ptr, int value) { + // CIR-LABEL: @atomic_nand_fetch + // LLVM-LABEL: @atomic_nand_fetch + // OGCG-LABEL: @atomic_nand_fetch + + return __atomic_nand_fetch(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[OLD:.+]] = atomicrmw nand ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // LLVM-NEXT: %[[TMP:.+]] = and i32 %[[OLD]], %[[VAL]] + // LLVM-NEXT: %[[RES:.+]] = xor i32 %[[TMP]], -1 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[OLD:.+]] = atomicrmw nand ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4 + // OGCG-NEXT: %[[TMP:.+]] = and i32 %[[OLD]], %[[VAL]] + // OGCG-NEXT: %[[RES:.+]] = xor i32 %[[TMP]], -1 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} + +int c11_atomic_fetch_nand(_Atomic(int) *ptr, int value) { + // CIR-LABEL: @c11_atomic_fetch_nand + // LLVM-LABEL: @c11_atomic_fetch_nand + // OGCG-LABEL: @c11_atomic_fetch_nand + + return __c11_atomic_fetch_nand(ptr, value, __ATOMIC_SEQ_CST); + // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + + // LLVM: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 + + // OGCG: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4 + // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4 +} diff --git a/clang/test/CIR/CodeGen/builtin_inline.c b/clang/test/CIR/CodeGen/builtin_inline.c new file mode 100644 index 0000000..83a3ba6 --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin_inline.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -disable-llvm-passes %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -disable-llvm-passes %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +typedef unsigned long size_t; + +// Normal inline builtin declaration +// When a builtin is redefined with extern inline + always_inline attributes, +// the compiler creates a .inline version to avoid conflicts with the builtin + +extern inline __attribute__((always_inline)) __attribute__((gnu_inline)) +void *memcpy(void *a, const void *b, size_t c) { + return __builtin_memcpy(a, b, c); +} + +void *test_inline_builtin_memcpy(void *a, const void *b, size_t c) { + return memcpy(a, b, c); +} + +// CIR: cir.func internal private{{.*}}@memcpy.inline({{.*}}) -> !cir.ptr<!void> inline(always) + +// CIR-LABEL: @test_inline_builtin_memcpy( +// CIR: cir.call @memcpy.inline( +// CIR: } + +// LLVM: define internal ptr @memcpy.inline(ptr{{.*}}, ptr{{.*}}, i64{{.*}}) #{{[0-9]+}} + +// LLVM-LABEL: @test_inline_builtin_memcpy( +// LLVM: call ptr @memcpy.inline( + +// OGCG-LABEL: @test_inline_builtin_memcpy( +// OGCG: call ptr @memcpy.inline( + +// OGCG: define internal ptr @memcpy.inline(ptr{{.*}} %a, ptr{{.*}} %b, i64{{.*}} %c) #{{[0-9]+}} + +// Shadowing case +// When a non-inline function definition shadows an inline builtin declaration, +// the .inline version should be replaced with the regular function and removed. + +extern inline __attribute__((always_inline)) __attribute__((gnu_inline)) +void *memmove(void *a, const void *b, size_t c) { + return __builtin_memmove(a, b, c); +} + +void *memmove(void *a, const void *b, size_t c) { + char *dst = (char *)a; + const char *src = (const char *)b; + if (dst < src) { + for (size_t i = 0; i < c; i++) { + dst[i] = src[i]; + } + } else { + for (size_t i = c; i > 0; i--) { + dst[i-1] = src[i-1]; + } + } + return a; +} + +void *test_shadowed_memmove(void *a, const void *b, size_t c) { + return memmove(a, b, c); +} + +// CIR: cir.func{{.*}}@memmove({{.*}}) -> !cir.ptr<!void>{{.*}}{ +// CIR-NOT: @memmove.inline + +// CIR-LABEL: @test_shadowed_memmove( +// CIR: cir.call @memmove( +// CIR-NOT: @memmove.inline +// CIR: } + +// LLVM: define dso_local ptr @memmove(ptr{{.*}}, ptr{{.*}}, i64{{.*}}) #{{[0-9]+}} +// LLVM-NOT: @memmove.inline + +// LLVM-LABEL: @test_shadowed_memmove( +// TODO - this deviation from OGCG is expected until we implement the nobuiltin +// attribute. See CIRGenFunction::emitDirectCallee +// LLVM: call ptr @memmove( +// LLVM-NOT: @memmove.inline +// LLVM: } + +// OGCG: define dso_local ptr @memmove(ptr{{.*}} %a, ptr{{.*}} %b, i64{{.*}} %c) #{{[0-9]+}} +// OGCG-NOT: @memmove.inline + +// OGCG-LABEL: @test_shadowed_memmove( +// OGCG: call void @llvm.memmove.p0.p0.i64( +// OGCG-NOT: @memmove.inline +// OGCG: } diff --git a/clang/test/CIR/CodeGen/dtors.cpp b/clang/test/CIR/CodeGen/dtors.cpp index f2c80a5..1fe048b7 100644 --- a/clang/test/CIR/CodeGen/dtors.cpp +++ b/clang/test/CIR/CodeGen/dtors.cpp @@ -35,7 +35,7 @@ bool make_temp(const B &) { return false; } bool test_temp_or() { return make_temp(1) || make_temp(2); } // CIR: cir.func{{.*}} @_Z12test_temp_orv() -// CIR: %[[SCOPE:.*]] = cir.scope { +// CIR: cir.scope { // CIR: %[[REF_TMP0:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["ref.tmp0"] // CIR: %[[ONE:.*]] = cir.const #cir.int<1> // CIR: cir.call @_ZN1BC2Ei(%[[REF_TMP0]], %[[ONE]]) @@ -51,9 +51,9 @@ bool test_temp_or() { return make_temp(1) || make_temp(2); } // CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP1]]) // CIR: cir.yield %[[MAKE_TEMP1]] : !cir.bool // CIR: }) +// CIR: cir.store{{.*}} %[[TERNARY]], %[[RETVAL:.*]] // CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP0]]) -// CIR: cir.yield %[[TERNARY]] : !cir.bool -// CIR: } : !cir.bool +// CIR: } // LLVM: define{{.*}} i1 @_Z12test_temp_orv(){{.*}} { // LLVM: %[[REF_TMP0:.*]] = alloca %struct.B @@ -105,7 +105,7 @@ bool test_temp_or() { return make_temp(1) || make_temp(2); } bool test_temp_and() { return make_temp(1) && make_temp(2); } // CIR: cir.func{{.*}} @_Z13test_temp_andv() -// CIR: %[[SCOPE:.*]] = cir.scope { +// CIR: cir.scope { // CIR: %[[REF_TMP0:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["ref.tmp0"] // CIR: %[[ONE:.*]] = cir.const #cir.int<1> // CIR: cir.call @_ZN1BC2Ei(%[[REF_TMP0]], %[[ONE]]) @@ -121,9 +121,9 @@ bool test_temp_and() { return make_temp(1) && make_temp(2); } // CIR: %[[FALSE:.*]] = cir.const #false // CIR: cir.yield %[[FALSE]] : !cir.bool // CIR: }) +// CIR: cir.store{{.*}} %[[TERNARY]], %[[RETVAL:.*]] // CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP0]]) -// CIR: cir.yield %[[TERNARY]] : !cir.bool -// CIR: } : !cir.bool +// CIR: } // LLVM: define{{.*}} i1 @_Z13test_temp_andv(){{.*}} { // LLVM: %[[REF_TMP0:.*]] = alloca %struct.B diff --git a/clang/test/CIR/CodeGen/lambda.cpp b/clang/test/CIR/CodeGen/lambda.cpp index 0c32ceb1..91380b9 100644 --- a/clang/test/CIR/CodeGen/lambda.cpp +++ b/clang/test/CIR/CodeGen/lambda.cpp @@ -219,14 +219,13 @@ int f() { // CIR: cir.func dso_local @_Z1fv() -> !s32i {{.*}} { // CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] -// CIR: %[[SCOPE_RET:.*]] = cir.scope { +// CIR: cir.scope { // CIR: %[[TMP:.*]] = cir.alloca ![[REC_LAM_G2]], !cir.ptr<![[REC_LAM_G2]]>, ["ref.tmp0"] // CIR: %[[G2:.*]] = cir.call @_Z2g2v() : () -> ![[REC_LAM_G2]] // CIR: cir.store{{.*}} %[[G2]], %[[TMP]] // CIR: %[[RESULT:.*]] = cir.call @_ZZ2g2vENK3$_0clEv(%[[TMP]]) -// CIR: cir.yield %[[RESULT]] +// CIR: cir.store{{.*}} %[[RESULT]], %[[RETVAL]] // CIR: } -// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]] // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]] // CIR: cir.return %[[RET]] @@ -255,10 +254,9 @@ int f() { // LLVM: %[[G2:.*]] = call %[[REC_LAM_G2]] @_Z2g2v() // LLVM: store %[[REC_LAM_G2]] %[[G2]], ptr %[[TMP]] // LLVM: %[[RESULT:.*]] = call i32 @"_ZZ2g2vENK3$_0clEv"(ptr %[[TMP]]) +// LLVM: store i32 %[[RESULT]], ptr %[[RETVAL]] // LLVM: br label %[[RET_BB:.*]] // LLVM: [[RET_BB]]: -// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[RESULT]], %[[SCOPE_BB]] ] -// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]] // LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]] // LLVM: ret i32 %[[RET]] @@ -333,14 +331,13 @@ struct A { // CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] // CIR: cir.store %[[THIS_ARG]], %[[THIS_ADDR]] // CIR: %[[THIS]] = cir.load deref %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A> -// CIR: %[[SCOPE_RET:.*]] = cir.scope { +// CIR: cir.scope { // CIR: %[[LAM_ADDR:.*]] = cir.alloca ![[REC_LAM_A]], !cir.ptr<![[REC_LAM_A]]>, ["ref.tmp0"] // CIR: %[[STRUCT_A:.*]] = cir.get_member %[[LAM_ADDR]][0] {name = "this"} : !cir.ptr<![[REC_LAM_A]]> -> !cir.ptr<!rec_A> // CIR: cir.call @_ZN1AC1ERKS_(%[[STRUCT_A]], %[[THIS]]){{.*}} : (!cir.ptr<!rec_A>, !cir.ptr<!rec_A>){{.*}} -> () // CIR: %[[LAM_RET:.*]] = cir.call @_ZZN1A3fooEvENKUlvE_clEv(%[[LAM_ADDR]]) -// CIR: cir.yield %[[LAM_RET]] +// CIR: cir.store{{.*}} %[[LAM_RET]], %[[RETVAL]] // CIR: } -// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]] // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]] // CIR: cir.return %[[RET]] @@ -355,10 +352,9 @@ struct A { // LLVM: %[[STRUCT_A:.*]] = getelementptr %[[REC_LAM_A]], ptr %[[LAM_ALLOCA]], i32 0, i32 0 // LLVM: call void @_ZN1AC1ERKS_(ptr %[[STRUCT_A]], ptr %[[THIS]]) // LLVM: %[[LAM_RET:.*]] = call i32 @_ZZN1A3fooEvENKUlvE_clEv(ptr %[[LAM_ALLOCA]]) +// LLVM: store i32 %[[LAM_RET]], ptr %[[RETVAL]] // LLVM: br label %[[RET_BB:.*]] // LLVM: [[RET_BB]]: -// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[LAM_RET]], %[[SCOPE_BB]] ] -// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]] // LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]] // LLVM: ret i32 %[[RET]] @@ -407,14 +403,13 @@ struct A { // CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] // CIR: cir.store %[[THIS_ARG]], %[[THIS_ADDR]] // CIR: %[[THIS]] = cir.load %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A> -// CIR: %[[SCOPE_RET:.*]] = cir.scope { +// CIR: cir.scope { // CIR: %[[LAM_ADDR:.*]] = cir.alloca ![[REC_LAM_PTR_A]], !cir.ptr<![[REC_LAM_PTR_A]]>, ["ref.tmp0"] // CIR: %[[A_ADDR_ADDR:.*]] = cir.get_member %[[LAM_ADDR]][0] {name = "this"} : !cir.ptr<![[REC_LAM_PTR_A]]> -> !cir.ptr<!cir.ptr<!rec_A>> // CIR: cir.store{{.*}} %[[THIS]], %[[A_ADDR_ADDR]] // CIR: %[[LAM_RET:.*]] = cir.call @_ZZN1A3barEvENKUlvE_clEv(%[[LAM_ADDR]]) -// CIR: cir.yield %[[LAM_RET]] +// CIR: cir.store{{.*}} %[[LAM_RET]], %[[RETVAL]] // CIR: } -// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]] // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]] // CIR: cir.return %[[RET]] @@ -429,10 +424,9 @@ struct A { // LLVM: %[[A_ADDR_ADDR:.*]] = getelementptr %[[REC_LAM_PTR_A]], ptr %[[LAM_ALLOCA]], i32 0, i32 0 // LLVM: store ptr %[[THIS]], ptr %[[A_ADDR_ADDR]] // LLVM: %[[LAM_RET:.*]] = call i32 @_ZZN1A3barEvENKUlvE_clEv(ptr %[[LAM_ALLOCA]]) +// LLVM: store i32 %[[LAM_RET]], ptr %[[RETVAL]] // LLVM: br label %[[RET_BB:.*]] // LLVM: [[RET_BB]]: -// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[LAM_RET]], %[[SCOPE_BB]] ] -// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]] // LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]] // LLVM: ret i32 %[[RET]] diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp index 000ea5b..2efad10 100644 --- a/clang/test/CIR/CodeGen/new.cpp +++ b/clang/test/CIR/CodeGen/new.cpp @@ -208,6 +208,127 @@ void t_new_constant_size() { // OGCG: %[[CALL:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 128) // OGCG: store ptr %[[CALL]], ptr %[[P_ADDR]], align 8 +class C { + public: + ~C(); +}; + +void t_constant_size_nontrivial() { + auto p = new C[3]; +} + +// CHECK: cir.func{{.*}} @_Z26t_constant_size_nontrivialv() +// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["p", init] {alignment = 8 : i64} +// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<11> : !u64i +// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void> +// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_BASE]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i> +// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i> +// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i +// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void> +// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_C> +// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>> +// CHECK: cir.return +// CHECK: } + +// LLVM: @_Z26t_constant_size_nontrivialv() +// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 11) +// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8 +// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[COOKIE_PTR]], i64 8 +// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 + +// OGCG: @_Z26t_constant_size_nontrivialv() +// OGCG: %[[ALLOCA:.*]] = alloca ptr, align 8 +// OGCG: %[[COOKIE_PTR:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 11) +// OGCG: store i64 3, ptr %[[COOKIE_PTR]], align 8 +// OGCG: %[[ALLOCATED_PTR:.*]] = getelementptr inbounds i8, ptr %[[COOKIE_PTR]], i64 8 +// OGCG: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 + +class D { + public: + int x; + ~D(); +}; + +void t_constant_size_nontrivial2() { + auto p = new D[3]; +} + +// In this test SIZE_WITHOUT_COOKIE isn't used, but it would be if there were +// an initializer. + +// CHECK: cir.func{{.*}} @_Z27t_constant_size_nontrivial2v() +// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>, ["p", init] {alignment = 8 : i64} +// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<12> : !u64i +// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<20> : !u64i +// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void> +// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_BASE]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i> +// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i> +// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i +// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void> +// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_D> +// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>> +// CHECK: cir.return +// CHECK: } + +// LLVM: @_Z27t_constant_size_nontrivial2v() +// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 20) +// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8 +// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[COOKIE_PTR]], i64 8 +// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 + +struct alignas(16) E { + int x; + ~E(); +}; + +void t_align16_nontrivial() { + auto p = new E[2]; +} + +// CHECK: cir.func{{.*}} @_Z20t_align16_nontrivialv() +// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>, ["p", init] {alignment = 8 : i64} +// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<2> : !u64i +// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<32> : !u64i +// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<48> : !u64i +// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void> +// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[COOKIE_OFFSET:.*]] = cir.const #cir.int<8> : !s32i +// CHECK: %[[COOKIE_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[COOKIE_OFFSET]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i> +// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i> +// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<16> : !s32i +// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>> +// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void> +// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_E> +// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>> +// CHECK: cir.return +// CHECK: } + +// LLVM: @_Z20t_align16_nontrivialv() +// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[RAW_PTR:.*]] = call ptr @_Znam(i64 48) +// LLVM: %[[COOKIE_PTR:.*]] = getelementptr ptr, ptr %[[RAW_PTR]], i64 8 +// LLVM: store i64 2, ptr %[[COOKIE_PTR]], align 8 +// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[RAW_PTR]], i64 16 +// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 + +// OGCG: define{{.*}} void @_Z20t_align16_nontrivialv +// OGCG: %[[ALLOCA:.*]] = alloca ptr, align 8 +// OGCG: %[[RAW_PTR:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 48) +// OGCG: %[[COOKIE_PTR:.*]] = getelementptr inbounds i8, ptr %[[RAW_PTR]], i64 8 +// OGCG: store i64 2, ptr %[[COOKIE_PTR]], align 8 +// OGCG: %[[ALLOCATED_PTR:.*]] = getelementptr inbounds i8, ptr %[[RAW_PTR]], i64 16 +// OGCG: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 +// OGCG: ret void void t_new_multidim_constant_size() { auto p = new double[2][3][4]; diff --git a/clang/test/CIR/CodeGen/statement-exprs.c b/clang/test/CIR/CodeGen/statement-exprs.c index f6ec9ec..c784ec9 100644 --- a/clang/test/CIR/CodeGen/statement-exprs.c +++ b/clang/test/CIR/CodeGen/statement-exprs.c @@ -218,7 +218,7 @@ struct S { int x; }; int test3() { return ({ struct S s = {1}; s; }).x; } // CIR: cir.func no_proto dso_local @test3() -> !s32i // CIR: %[[RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] -// CIR: %[[YIELDVAL:.+]] = cir.scope { +// CIR: cir.scope { // CIR: %[[REF_TMP0:.+]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["ref.tmp0"] // CIR: %[[TMP:.+]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["tmp"] // CIR: cir.scope { @@ -230,9 +230,8 @@ int test3() { return ({ struct S s = {1}; s; }).x; } // CIR: } // CIR: %[[GEP_X_TMP:.+]] = cir.get_member %[[REF_TMP0]][0] {name = "x"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i> // CIR: %[[XVAL:.+]] = cir.load {{.*}} %[[GEP_X_TMP]] : !cir.ptr<!s32i>, !s32i -// CIR: cir.yield %[[XVAL]] : !s32i -// CIR: } : !s32i -// CIR: cir.store %[[YIELDVAL]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.store{{.*}} %[[XVAL]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i> +// CIR: } // CIR: %[[RES:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i // CIR: cir.return %[[RES]] : !s32i @@ -252,10 +251,9 @@ int test3() { return ({ struct S s = {1}; s; }).x; } // LLVM: [[LBL8]]: // LLVM: %[[GEP_VAR1:.+]] = getelementptr %struct.S, ptr %[[VAR1]], i32 0, i32 0 // LLVM: %[[LOAD_X:.+]] = load i32, ptr %[[GEP_VAR1]] +// LLVM: store i32 %[[LOAD_X]], ptr %[[VAR4]] // LLVM: br label %[[LBL11:.+]] // LLVM: [[LBL11]]: -// LLVM: %[[PHI:.+]] = phi i32 [ %[[LOAD_X]], %[[LBL8]] ] -// LLVM: store i32 %[[PHI]], ptr %[[VAR4]] // LLVM: %[[RES:.+]] = load i32, ptr %[[VAR4]] // LLVM: ret i32 %[[RES]] diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp index 6d362c7..c8db714 100644 --- a/clang/test/CIR/CodeGen/struct.cpp +++ b/clang/test/CIR/CodeGen/struct.cpp @@ -280,3 +280,67 @@ void bin_comma() { // OGCG: define{{.*}} void @_Z9bin_commav() // OGCG: %[[A_ADDR:.*]] = alloca %struct.CompleteS, align 4 // OGCG: call void @llvm.memset.p0.i64(ptr align 4 %[[A_ADDR]], i8 0, i64 8, i1 false) + +void compound_literal_expr() { CompleteS a = (CompleteS){}; } + +// CIR: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a", init] +// CIR: %[[A_ELEM_0_PTR:.*]] = cir.get_member %[[A_ADDR]][0] {name = "a"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s32i> +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store{{.*}} %[[CONST_0]], %[[A_ELEM_0_PTR]] : !s32i, !cir.ptr<!s32i> +// CIR: %[[A_ELEM_1_PTR:.*]] = cir.get_member %[[A_ADDR]][1] {name = "b"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s8i> +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s8i +// CIR: cir.store{{.*}} %[[CONST_0]], %[[A_ELEM_1_PTR]] : !s8i, !cir.ptr<!s8i> + +// TODO(cir): zero-initialize the padding + +// LLVM: %[[A_ADDR:.*]] = alloca %struct.CompleteS, i64 1, align 4 +// LLVM: %[[A_ELEM_0_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[A_ADDR]], i32 0, i32 0 +// LLVM: store i32 0, ptr %[[A_ELEM_0_PTR]], align 4 +// LLVM: %[[A_ELEM_1_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[A_ADDR]], i32 0, i32 1 +// LLVM: store i8 0, ptr %[[A_ELEM_1_PTR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca %struct.CompleteS, align 4 +// OGCG: call void @llvm.memset.p0.i64(ptr align 4 %[[A_ADDR]], i8 0, i64 8, i1 false) + +struct StructWithConstMember { + int a : 1; +}; + +void struct_with_const_member_expr() { + int a = (StructWithConstMember){}.a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[RESULT:.*]] = cir.scope { +// CIR: %[[REF_ADDR:.*]] = cir.alloca !rec_StructWithConstMember, !cir.ptr<!rec_StructWithConstMember>, ["ref.tmp0"] +// CIR: %[[ELEM_0_PTR:.*]] = cir.get_member %[[REF_ADDR]][0] {name = "a"} : !cir.ptr<!rec_StructWithConstMember> -> !cir.ptr<!u8i> +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i +// CIR: %[[SET_BF:.*]] = cir.set_bitfield{{.*}} (#bfi_a, %[[ELEM_0_PTR]] : !cir.ptr<!u8i>, %[[CONST_0]] : !s32i) -> !s32i +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.yield %[[CONST_0]] : !s32i +// CIR: } : !s32i +// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i> + +// TODO(cir): zero-initialize the padding + +// LLVM: %[[REF_ADDR:.*]] = alloca %struct.StructWithConstMember, i64 1, align 4 +// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: br label %[[BF_LABEL:.*]] +// LLVM: [[BF_LABEL]]: +// LLVM: %[[ELEM_0_PTR:.*]] = getelementptr %struct.StructWithConstMember, ptr %[[REF_ADDR]], i32 0, i32 0 +// LLVM: %[[TMP_REF:.*]] = load i8, ptr %[[ELEM_0_PTR]], align 4 +// LLVM: %[[BF_CLEAR:.*]] = and i8 %[[TMP_REF]], -2 +// LLVM: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], 0 +// LLVM: store i8 %[[BF_SET]], ptr %[[ELEM_0_PTR]], align 4 +// LLVM: br label %[[RESULT_LABEL:.*]] +// LLVM: [[RESULT_LABEL]]: +// LLVM: %[[RESULT:.*]] = phi i32 [ 0, %[[BF_LABEL]] ] +// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[REF_ADDR:.*]] = alloca %struct.StructWithConstMember, align 4 +// OGCG: %[[TMP_REF:.*]] = load i8, ptr %[[REF_ADDR]], align 4 +// OGCG: %[[BF_CLEAR:.*]] = and i8 %[[TMP_REF]], -2 +// OGCG: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], 0 +// OGCG: store i8 %[[BF_SET]], ptr %[[REF_ADDR]], align 4 +// OGCG: store i32 0, ptr %[[A_ADDR]], align 4 diff --git a/clang/test/CIR/CodeGen/try-catch.cpp b/clang/test/CIR/CodeGen/try-catch.cpp index 8f0b3c4..5a50310 100644 --- a/clang/test/CIR/CodeGen/try-catch.cpp +++ b/clang/test/CIR/CodeGen/try-catch.cpp @@ -30,3 +30,90 @@ void empty_try_block_with_catch_with_int_exception() { // OGCG: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv() // OGCG: ret void + +void try_catch_with_empty_catch_all() { + int a = 1; + try { + return; + ++a; + } catch (...) { + } +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store{{.*}} %[[CONST_1]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i +// CIR: cir.scope { +// CIR: cir.try { +// CIR: cir.return +// CIR: ^bb1: // no predecessors +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i +// CIR: %[[RESULT:.*]] = cir.unary(inc, %[[TMP_A]]) nsw : !s32i, !s32i +// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.yield +// CIR: } +// CIR: } + +// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: store i32 1, ptr %[[A_ADDR]], align 4 +// LLVM: br label %[[BB_2:.*]] +// LLVM: [[BB_2]]: +// LLVM: br label %[[BB_3:.*]] +// LLVM: [[BB_3]]: +// LLVM: ret void +// LLVM: [[BB_4:.*]]: +// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// LLVM: %[[RESULT:.*]] = add nsw i32 %[[TMP_A]], 1 +// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4 +// LLVM: br label %[[BB_7:.*]] +// LLVM: [[BB_7]]: +// LLVM: br label %[[BB_8:.*]] +// LLVM: [[BB_8]]: +// LLVM: ret void + +// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 +// OGCG: store i32 1, ptr %[[A_ADDR]], align 4 +// OGCG: ret void + +void try_catch_with_empty_catch_all_2() { + int a = 1; + try { + ++a; + return; + } catch (...) { + } +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] +// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store{{.*}} %[[CONST_1]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.scope { +// CIR: cir.try { +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i +// CIR: %[[RESULT:.*]] = cir.unary(inc, %[[TMP_A]]) nsw : !s32i, !s32i +// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i> +// CIR: cir.return +// CIR: } +// CIR: } + +// LLVM: %[[A_ADDR]] = alloca i32, i64 1, align 4 +// LLVM: store i32 1, ptr %[[A_ADDR]], align 4 +// LLVM: br label %[[BB_2:.*]] +// LLVM: [[BB_2]]: +// LLVM: br label %[[BB_3:.*]] +// LLVM: [[BB_3]]: +// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// LLVM: %[[RESULT:.*]] = add nsw i32 %[[TMP_A:.*]], 1 +// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4 +// LLVM: ret void +// LLVM: [[BB_6:.*]]: +// LLVM: br label %[[BB_7:.*]] +// LLVM: [[BB_7]]: +// LLVM: ret void + +// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 +// OGCG: store i32 1, ptr %[[A_ADDR]], align 4 +// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// OGCG: %[[RESULT:.*]] = add nsw i32 %[[TMP_A]], 1 +// OGCG: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4 +// OGCG: ret void diff --git a/clang/test/CIR/CodeGen/vla.c b/clang/test/CIR/CodeGen/vla.c index b22c704..0af4f83 100644 --- a/clang/test/CIR/CodeGen/vla.c +++ b/clang/test/CIR/CodeGen/vla.c @@ -282,4 +282,61 @@ void f3(unsigned len) { // break; // } // } -
\ No newline at end of file + +int f5(unsigned long len) { + int arr[len]; + return arr[2]; +} + +// CIR: cir.func{{.*}} @f5(%[[LEN_ARG:.*]]: !u64i {{.*}}) -> !s32i +// CIR: %[[LEN_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["len", init] +// CIR: %[[RET_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] +// CIR: %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"] +// CIR: cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]] +// CIR: %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]] +// CIR: %[[STACK_PTR:.*]] = cir.stacksave +// CIR: cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]] +// CIR: %[[ARR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[LEN]] : !u64i, ["arr"] +// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i +// CIR: %[[ARR_2:.*]] = cir.ptr_stride %[[ARR]], %[[TWO]] +// CIR: %[[ARR_VAL:.*]] = cir.load{{.*}} %[[ARR_2]] : !cir.ptr<!s32i>, !s32i +// CIR: cir.store{{.*}} %[[ARR_VAL]], %[[RET_ADDR]] : !s32i, !cir.ptr<!s32i> +// CIR: %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]] +// CIR: cir.stackrestore %[[STACK_RESTORE_PTR]] +// CIR: %[[RET_VAL:.*]] = cir.load{{.*}} %[[RET_ADDR]] +// CIR: cir.return %[[RET_VAL]] : !s32i + +// LLVM: define{{.*}} i32 @f5(i64 %[[LEN_ARG:.*]]) +// LLVM: %[[LEN_ADDR:.*]] = alloca i64 +// LLVM: %[[RET_ADDR:.*]] = alloca i32 +// LLVM: %[[SAVED_STACK:.*]] = alloca ptr +// LLVM: store i64 %[[LEN_ARG]], ptr %[[LEN_ADDR]] +// LLVM: %[[LEN:.*]] = load i64, ptr %[[LEN_ADDR]] +// LLVM: %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0() +// LLVM: store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]] +// LLVM: %[[ARR:.*]] = alloca i32, i64 %[[LEN]] +// LLVM: %[[ARR_2:.*]] = getelementptr i32, ptr %[[ARR]], i64 2 +// LLVM: %[[ARR_VAL:.*]] = load i32, ptr %[[ARR_2]] +// LLVM: store i32 %[[ARR_VAL]], ptr %[[RET_ADDR]] +// LLVM: %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]] +// LLVM: call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]]) +// LLVM: %[[RET_VAL:.*]] = load i32, ptr %[[RET_ADDR]] +// LLVM: ret i32 %[[RET_VAL]] + +// Note: VLA_EXPR0 below is emitted to capture debug info. + +// OGCG: define{{.*}} i32 @f5(i64 {{.*}} %[[LEN_ARG:.*]]) +// OGCG: %[[LEN_ADDR:.*]] = alloca i64 +// OGCG: %[[SAVED_STACK:.*]] = alloca ptr +// OGCG: %[[VLA_EXPR0:.*]] = alloca i64 +// OGCG: store i64 %[[LEN_ARG]], ptr %[[LEN_ADDR]] +// OGCG: %[[LEN:.*]] = load i64, ptr %[[LEN_ADDR]] +// OGCG: %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0() +// OGCG: store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]] +// OGCG: %[[ARR:.*]] = alloca i32, i64 %[[LEN]] +// OGCG: store i64 %[[LEN]], ptr %[[VLA_EXPR0]] +// OGCG: %[[ARR_2:.*]] = getelementptr inbounds i32, ptr %[[ARR]], i64 2 +// OGCG: %[[ARR_VAL:.*]] = load i32, ptr %[[ARR_2]] +// OGCG: %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]] +// OGCG: call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]]) +// OGCG: ret i32 %[[ARR_VAL]] diff --git a/clang/test/CIR/IR/invalid-atomic.cir b/clang/test/CIR/IR/invalid-atomic.cir new file mode 100644 index 0000000..a124e43 --- /dev/null +++ b/clang/test/CIR/IR/invalid-atomic.cir @@ -0,0 +1,7 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +cir.func @f1(%arg0: !cir.ptr<!cir.float>, %arg1: !cir.float) { + // expected-error @below {{only atomic add, sub, max, and min operation could operate on floating-point values}} + %0 = cir.atomic.fetch and seq_cst %arg0, %arg1 : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float + cir.return +} diff --git a/clang/test/Driver/arm-abi.c b/clang/test/Driver/arm-abi.c index 139456c..b89b969 100644 --- a/clang/test/Driver/arm-abi.c +++ b/clang/test/Driver/arm-abi.c @@ -31,6 +31,8 @@ // FreeBSD / OpenBSD default to aapcs-linux // RUN: %clang -target arm--freebsd- %s -### -o %t.o 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s +// RUN: %clang -target arm--fuchsia- %s -### -o %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s // RUN: %clang -target arm--openbsd- %s -### -o %t.o 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s // RUN: %clang -target arm--haiku- %s -### -o %t.o 2>&1 \ diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c index cf92f85..3fb2a94 100644 --- a/clang/test/Driver/fuchsia.c +++ b/clang/test/Driver/fuchsia.c @@ -2,6 +2,10 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK,CHECK-X86_64 %s +// RUN: %clang -### %s --target=arm-unknown-fuchsia \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARMV8A %s // RUN: %clang -### %s --target=aarch64-unknown-fuchsia \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ @@ -14,6 +18,10 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK,CHECK-X86_64 %s +// RUN: %clang -### %s --target=arm-fuchsia \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARMV8A %s // RUN: %clang -### %s --target=aarch64-fuchsia \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \ @@ -24,6 +32,7 @@ // RUN: | FileCheck -check-prefixes=CHECK,CHECK-RISCV64 %s // CHECK: "-cc1" // CHECK-X86_64: "-triple" "x86_64-unknown-fuchsia" +// CHECK-ARMV8A: "-triple" "thumbv8a-unknown-fuchsia" // CHECK-AARCH64: "-triple" "aarch64-unknown-fuchsia" // CHECK-RISCV64: "-triple" "riscv64-unknown-fuchsia" // CHECK: "-funwind-tables=2" diff --git a/clang/test/Preprocessor/riscv-atomics.c b/clang/test/Preprocessor/riscv-atomics.c new file mode 100644 index 0000000..6e02173 --- /dev/null +++ b/clang/test/Preprocessor/riscv-atomics.c @@ -0,0 +1,24 @@ +// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32ia -x c -E -dM %s \ +// RUN: -o - | FileCheck %s +// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i_zalrsc -x c -E \ +// RUN: -dM %s -o - | FileCheck %s +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64ia -x c -E -dM %s \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i_zalrsc -x c -E \ +// RUN: -dM %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 + +// CHECK: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_INT_LOCK_FREE 2 +// CHECK-RV64: #define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// CHECK: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// CHECK: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK-RV64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp index 4827cc1..cae9265 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp @@ -2453,6 +2453,167 @@ TEST_P(UncheckedStatusOrAccessModelTest, SubclassOperator) { )cc"); } +TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusCheck) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor) { + if (sor.status().ok()) + sor.value(); + else + sor.value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusRefCheck) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor) { + const STATUS& s = sor.status(); + if (s.ok()) + sor.value(); + else + sor.value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusPtrCheck) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor) { + const STATUS* s = &sor.status(); + if (s->ok()) + sor.value(); + else + sor.value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithMovedStatus) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor) { + if (std::move(sor.status()).ok()) + sor.value(); + else + sor.value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, MembersUsedInsideStatus) { + ExpectDiagnosticsFor(R"cc( + namespace absl { + + class Status { + public: + bool ok() const; + + void target() const { ok(); } + }; + + } // namespace absl + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, StatusUpdate) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor) { + STATUS s; + s.Update(sor.status()); + if (s.ok()) + sor.value(); + else + sor.value(); // [[unsafe]] + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor1, STATUSOR_INT sor2) { + STATUS s; + s.Update(sor1.status()); + s.Update(sor2.status()); + if (s.ok()) { + sor1.value(); + sor2.value(); + } + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor1, STATUSOR_INT sor2) { + STATUS s; + s.Update(sor1.status()); + CHECK(s.ok()); + s.Update(sor2.status()); + sor1.value(); + sor2.value(); // [[unsafe]] + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor1, STATUSOR_INT sor2) { + STATUS s; + s.Update(sor1.status()); + CHECK(s.ok()); + sor1.value(); + sor2.value(); // [[unsafe]] + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor1, STATUSOR_INT sor2) { + STATUS s; + STATUS sor1_status = sor1.status(); + s.Update(std::move(sor1_status)); + CHECK(s.ok()); + sor1.value(); + sor2.value(); // [[unsafe]] + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(STATUSOR_INT sor1, STATUSOR_INT sor2) { + STATUS s; + STATUS sor1_status = sor1.status(); + sor1_status.Update(sor2.status()); + s.Update(std::move(sor1_status)); + CHECK(s.ok()); + sor1.value(); + sor2.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + const STATUS& OptStatus(); + + void target(STATUSOR_INT sor) { + auto s = sor.status(); + s.Update(OptStatus()); + if (s.ok()) sor.value(); + } + )cc"); +} + } // namespace std::string diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index fc80bf4..6b433bb 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -839,6 +839,25 @@ TEST_F(FormatTestComments, MultiLineCommentsInDefines) { getLLVMStyleWithColumns(17))); } +TEST_F(FormatTestComments, LineCommentsInMacrosDoNotGetEscapedNewlines) { + FormatStyle Style = getLLVMStyleWithColumns(0); + Style.ReflowComments = FormatStyle::RCS_Never; + verifyFormat("#define FOO (1U) // comment\n" + " // comment", + Style); + + Style.ColumnLimit = 32; + verifyFormat("#define SOME_MACRO(x) x\n" + "#define FOO \\\n" + " SOME_MACRO(1) + \\\n" + " SOME_MACRO(2) // comment\n" + " // comment", + "#define SOME_MACRO(x) x\n" + "#define FOO SOME_MACRO(1) + SOME_MACRO(2) // comment\n" + " // comment", + Style); +} + TEST_F(FormatTestComments, ParsesCommentsAdjacentToPPDirectives) { EXPECT_EQ("namespace {}\n// Test\n#define A", format("namespace {}\n // Test\n#define A")); diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index b86bb1b..eaff813 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -117,14 +117,22 @@ include(CompilerRTDarwinUtils) if(APPLE) find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx) - find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator) - find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos) - find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator) - find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos) - find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator) - find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos) - find_darwin_sdk_dir(DARWIN_xrossim_SYSROOT xrsimulator) - find_darwin_sdk_dir(DARWIN_xros_SYSROOT xros) + if(COMPILER_RT_ENABLE_IOS) + find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator) + find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos) + endif() + if(COMPILER_RT_ENABLE_WATCHOS) + find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator) + find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos) + endif() + if(COMPILER_RT_ENABLE_TVOS) + find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator) + find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos) + endif() + if(COMPILER_RT_ENABLE_XROS) + find_darwin_sdk_dir(DARWIN_xrossim_SYSROOT xrsimulator) + find_darwin_sdk_dir(DARWIN_xros_SYSROOT xros) + endif() # Get supported architecture from SDKSettings. function(sdk_has_arch_support sdk_path os arch has_support) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 67db438..8dfbdec 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -408,12 +408,18 @@ if(APPLE) include(CompilerRTDarwinUtils) find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx) - find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator) - find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos) - find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator) - find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos) - find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator) - find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos) + if(COMPILER_RT_ENABLE_IOS) + find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator) + find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos) + endif() + if(COMPILER_RT_ENABLE_WATCHOS) + find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator) + find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos) + endif() + if(COMPILER_RT_ENABLE_TVOS) + find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator) + find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos) + endif() if(NOT DARWIN_osx_SYSROOT) message(WARNING "Could not determine OS X sysroot, trying /usr/include") diff --git a/compiler-rt/lib/asan/asan_rtl_x86_64.S b/compiler-rt/lib/asan/asan_rtl_x86_64.S index 9c52898..5ee830d 100644 --- a/compiler-rt/lib/asan/asan_rtl_x86_64.S +++ b/compiler-rt/lib/asan/asan_rtl_x86_64.S @@ -5,6 +5,7 @@ #include "sanitizer_common/sanitizer_platform.h" .file "asan_rtl_x86_64.S" +.att_syntax #define NAME(n, reg, op, s, i) n##_##op##_##i##_##s##_##reg diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h index d1e5328..79a45d91 100644 --- a/compiler-rt/lib/builtins/assembly.h +++ b/compiler-rt/lib/builtins/assembly.h @@ -337,4 +337,8 @@ #endif #endif +#if defined(__i386__) || defined(__amd64__) +.att_syntax +#endif + #endif // COMPILERRT_ASSEMBLY_H diff --git a/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S index 9804e8d..a5379d3 100644 --- a/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S +++ b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S @@ -30,6 +30,7 @@ .section .text .file "hwasan_setjmp_x86_64.S" +.att_syntax .global ASM_WRAPPER_NAME(setjmp) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(setjmp)) diff --git a/compiler-rt/lib/orc/elfnix_tls.x86-64.S b/compiler-rt/lib/orc/elfnix_tls.x86-64.S index b3e0bef..da20212 100644 --- a/compiler-rt/lib/orc/elfnix_tls.x86-64.S +++ b/compiler-rt/lib/orc/elfnix_tls.x86-64.S @@ -13,6 +13,7 @@ // The content of this file is x86_64-only #if defined(__x86_64__) +.att_syntax #define REGISTER_SAVE_SPACE_SIZE 512 diff --git a/compiler-rt/lib/orc/sysv_reenter.x86-64.S b/compiler-rt/lib/orc/sysv_reenter.x86-64.S index 0a36280..99615c0 100644 --- a/compiler-rt/lib/orc/sysv_reenter.x86-64.S +++ b/compiler-rt/lib/orc/sysv_reenter.x86-64.S @@ -12,6 +12,7 @@ // The content of this file is x86_64-only #if defined(__x86_64__) +.att_syntax // Save all GRPS except %rsp. // This value is also subtracted from %rsp below, despite the fact that %rbp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S index c633014..5ef090c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S @@ -2,6 +2,8 @@ #include "sanitizer_common/sanitizer_asm.h" +.att_syntax + .comm _ZN14__interception10real_vforkE,4,4 .globl ASM_WRAPPER_NAME(vfork) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S index 5500f81..9c85407 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S @@ -2,6 +2,8 @@ #include "sanitizer_common/sanitizer_asm.h" +.att_syntax + .comm _ZN14__interception10real_vforkE,8,8 .globl ASM_WRAPPER_NAME(vfork) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S index f848be9..8b9b706 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S @@ -3,6 +3,8 @@ #include "sanitizer_common/sanitizer_asm.h" +.att_syntax + #if !defined(__APPLE__) .section .text #else diff --git a/flang-rt/lib/cuda/kernel.cpp b/flang-rt/lib/cuda/kernel.cpp index c52d039..296f4b7 100644 --- a/flang-rt/lib/cuda/kernel.cpp +++ b/flang-rt/lib/cuda/kernel.cpp @@ -23,9 +23,9 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, gridDim.y = gridY; gridDim.z = gridZ; dim3 blockDim; - blockDim.x = blockX > 1024 ? 1024 : blockX; - blockDim.y = blockY > 1024 ? 1024 : blockY; - blockDim.z = blockZ > 64 ? 64 : blockZ; + blockDim.x = blockX; + blockDim.y = blockY; + blockDim.z = blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; @@ -76,8 +76,8 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, terminator.Crash("Too many invalid grid dimensions"); } cudaStream_t defaultStream = 0; - CUDA_REPORT_IF_ERROR(cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, - stream != nullptr ? (cudaStream_t)(*stream) : defaultStream)); + cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, + stream != nullptr ? (cudaStream_t)(*stream) : defaultStream); } void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX, @@ -88,9 +88,9 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX, config.gridDim.x = gridX; config.gridDim.y = gridY; config.gridDim.z = gridZ; - config.blockDim.x = blockX > 1024 ? 1024 : blockX; - config.blockDim.y = blockY > 1024 ? 1024 : blockY; - config.blockDim.z = blockZ > 64 ? 64 : blockZ; + config.blockDim.x = blockX; + config.blockDim.y = blockY; + config.blockDim.z = blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; @@ -153,7 +153,7 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX, launchAttr[0].val.clusterDim.z = clusterZ; config.numAttrs = 1; config.attrs = launchAttr; - CUDA_REPORT_IF_ERROR(cudaLaunchKernelExC(&config, kernel, params)); + cudaLaunchKernelExC(&config, kernel, params); } void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX, @@ -165,9 +165,9 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX, gridDim.y = gridY; gridDim.z = gridZ; dim3 blockDim; - blockDim.x = blockX > 1024 ? 1024 : blockX; - blockDim.y = blockY > 1024 ? 1024 : blockY; - blockDim.z = blockZ > 64 ? 64 : blockZ; + blockDim.x = blockX; + blockDim.y = blockY; + blockDim.z = blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; @@ -218,8 +218,8 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX, terminator.Crash("Too many invalid grid dimensions"); } cudaStream_t defaultStream = 0; - CUDA_REPORT_IF_ERROR(cudaLaunchCooperativeKernel(kernel, gridDim, blockDim, - params, smem, stream != nullptr ? (cudaStream_t)*stream : defaultStream)); + cudaLaunchCooperativeKernel(kernel, gridDim, blockDim, params, smem, + stream != nullptr ? (cudaStream_t)*stream : defaultStream); } } // extern "C" diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md index 17083b4..8d9daa4 100644 --- a/flang/docs/FortranLLVMTestSuite.md +++ b/flang/docs/FortranLLVMTestSuite.md @@ -73,5 +73,3 @@ instructions described [above](#running-the-llvm-test-suite-with-fortran). There are additional configure-time options that can be used with the gfortran tests. More details about those options and their purpose can be found in [`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md). - - These tests are Free Software and are shared under the terms of the GNU General Public License (GPL). For more details, please see the accompanying [`LICENSE`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/LICENSE.txt) file. diff --git a/flang/include/flang/Lower/DirectivesCommon.h b/flang/include/flang/Lower/DirectivesCommon.h index 6ed3c1b..2d69067 100644 --- a/flang/include/flang/Lower/DirectivesCommon.h +++ b/flang/include/flang/Lower/DirectivesCommon.h @@ -39,7 +39,6 @@ #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/Value.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include <list> #include <type_traits> diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index f7910ad..c03d0a0 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -262,6 +262,7 @@ public: const Scope &FindScope(parser::CharBlock) const; Scope &FindScope(parser::CharBlock); void UpdateScopeIndex(Scope &, parser::CharBlock); + void DumpScopeIndex(llvm::raw_ostream &) const; bool IsInModuleFile(parser::CharBlock) const; diff --git a/flang/include/flang/Utils/OpenMP.h b/flang/include/flang/Utils/OpenMP.h index 01a94c9..bad0abb 100644 --- a/flang/include/flang/Utils/OpenMP.h +++ b/flang/include/flang/Utils/OpenMP.h @@ -29,8 +29,9 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr, llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds, llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex, - uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType, - mlir::Type retTy, bool partialMap = false, + mlir::omp::ClauseMapFlags mapType, + mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, + bool partialMap = false, mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()); /// For an mlir value that does not have storage, allocate temporary storage diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index b3e8b69..af4f420 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -718,6 +718,84 @@ static void genDataOperandOperations( } } +template <typename GlobalCtorOrDtorOp, typename EntryOp, typename DeclareOp, + typename ExitOp> +static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder, + fir::FirOpBuilder &builder, + mlir::Location loc, fir::GlobalOp globalOp, + mlir::acc::DataClause clause, + const std::string &declareGlobalName, + bool implicit, std::stringstream &asFortran) { + GlobalCtorOrDtorOp declareGlobalOp = + GlobalCtorOrDtorOp::create(modBuilder, loc, declareGlobalName); + builder.createBlock(&declareGlobalOp.getRegion(), + declareGlobalOp.getRegion().end(), {}, {}); + builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back()); + + fir::AddrOfOp addrOp = fir::AddrOfOp::create( + builder, loc, fir::ReferenceType::get(globalOp.getType()), + globalOp.getSymbol()); + addDeclareAttr(builder, addrOp, clause); + + llvm::SmallVector<mlir::Value> bounds; + EntryOp entryOp = createDataEntryOp<EntryOp>( + builder, loc, addrOp.getResTy(), asFortran, bounds, + /*structured=*/false, implicit, clause, addrOp.getResTy().getType(), + /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); + if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>) + DeclareOp::create(builder, loc, + mlir::acc::DeclareTokenType::get(entryOp.getContext()), + mlir::ValueRange(entryOp.getAccVar())); + else + DeclareOp::create(builder, loc, mlir::Value{}, + mlir::ValueRange(entryOp.getAccVar())); + if constexpr (std::is_same_v<GlobalCtorOrDtorOp, + mlir::acc::GlobalDestructorOp>) { + if constexpr (std::is_same_v<ExitOp, mlir::acc::DeclareLinkOp>) { + // No destructor emission for declare link in this path to avoid + // complex var/varType/varPtrPtr signatures. The ctor registers the link. + } else if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> || + std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>) { + ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), + entryOp.getVar(), entryOp.getVarType(), + entryOp.getBounds(), entryOp.getAsyncOperands(), + entryOp.getAsyncOperandsDeviceTypeAttr(), + entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), + /*structured=*/false, /*implicit=*/false, + builder.getStringAttr(*entryOp.getName())); + } else { + ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), + entryOp.getBounds(), entryOp.getAsyncOperands(), + entryOp.getAsyncOperandsDeviceTypeAttr(), + entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), + /*structured=*/false, /*implicit=*/false, + builder.getStringAttr(*entryOp.getName())); + } + } + mlir::acc::TerminatorOp::create(builder, loc); + modBuilder.setInsertionPointAfter(declareGlobalOp); +} + +template <typename EntryOp, typename ExitOp> +static void +emitCtorDtorPair(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, + mlir::Location operandLocation, fir::GlobalOp globalOp, + mlir::acc::DataClause clause, std::stringstream &asFortran, + const std::string &ctorName) { + createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, EntryOp, + mlir::acc::DeclareEnterOp, ExitOp>( + modBuilder, builder, operandLocation, globalOp, clause, ctorName, + /*implicit=*/false, asFortran); + + std::stringstream dtorName; + dtorName << globalOp.getSymName().str() << "_acc_dtor"; + createDeclareGlobalOp<mlir::acc::GlobalDestructorOp, + mlir::acc::GetDevicePtrOp, mlir::acc::DeclareExitOp, + ExitOp>(modBuilder, builder, operandLocation, globalOp, + clause, dtorName.str(), + /*implicit=*/false, asFortran); +} + template <typename EntryOp, typename ExitOp> static void genDeclareDataOperandOperations( const Fortran::parser::AccObjectList &objectList, @@ -733,6 +811,37 @@ static void genDeclareDataOperandOperations( std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); + // Handle COMMON/global symbols via module-level ctor/dtor path. + if (symbol.detailsIf<Fortran::semantics::CommonBlockDetails>() || + Fortran::semantics::FindCommonBlockContaining(symbol)) { + emitCommonGlobal( + converter, builder, accObject, dataClause, + [&](mlir::OpBuilder &modBuilder, mlir::Location loc, + fir::GlobalOp globalOp, mlir::acc::DataClause clause, + std::stringstream &asFortranStr, const std::string &ctorName) { + if constexpr (std::is_same_v<EntryOp, mlir::acc::DeclareLinkOp>) { + createDeclareGlobalOp< + mlir::acc::GlobalConstructorOp, mlir::acc::DeclareLinkOp, + mlir::acc::DeclareEnterOp, mlir::acc::DeclareLinkOp>( + modBuilder, builder, loc, globalOp, clause, ctorName, + /*implicit=*/false, asFortranStr); + } else if constexpr (std::is_same_v<EntryOp, mlir::acc::CreateOp> || + std::is_same_v<EntryOp, mlir::acc::CopyinOp> || + std::is_same_v< + EntryOp, + mlir::acc::DeclareDeviceResidentOp> || + std::is_same_v<ExitOp, mlir::acc::CopyoutOp>) { + emitCtorDtorPair<EntryOp, ExitOp>(modBuilder, builder, loc, + globalOp, clause, asFortranStr, + ctorName); + } else { + // No module-level ctor/dtor for this clause (e.g., deviceptr, + // present). Handled via structured declare region only. + return; + } + }); + continue; + } Fortran::semantics::MaybeExpr designator = Fortran::common::visit( [&](auto &&s) { return ea.Analyze(s); }, accObject.u); fir::factory::AddrAndBoundsInfo info = @@ -4098,49 +4207,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, waitOp.setAsyncAttr(firOpBuilder.getUnitAttr()); } -template <typename GlobalOp, typename EntryOp, typename DeclareOp, - typename ExitOp> -static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder, - fir::FirOpBuilder &builder, - mlir::Location loc, fir::GlobalOp globalOp, - mlir::acc::DataClause clause, - const std::string &declareGlobalName, - bool implicit, std::stringstream &asFortran) { - GlobalOp declareGlobalOp = - GlobalOp::create(modBuilder, loc, declareGlobalName); - builder.createBlock(&declareGlobalOp.getRegion(), - declareGlobalOp.getRegion().end(), {}, {}); - builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back()); - - fir::AddrOfOp addrOp = fir::AddrOfOp::create( - builder, loc, fir::ReferenceType::get(globalOp.getType()), - globalOp.getSymbol()); - addDeclareAttr(builder, addrOp, clause); - - llvm::SmallVector<mlir::Value> bounds; - EntryOp entryOp = createDataEntryOp<EntryOp>( - builder, loc, addrOp.getResTy(), asFortran, bounds, - /*structured=*/false, implicit, clause, addrOp.getResTy().getType(), - /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); - if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>) - DeclareOp::create(builder, loc, - mlir::acc::DeclareTokenType::get(entryOp.getContext()), - mlir::ValueRange(entryOp.getAccVar())); - else - DeclareOp::create(builder, loc, mlir::Value{}, - mlir::ValueRange(entryOp.getAccVar())); - if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) { - ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), - entryOp.getBounds(), entryOp.getAsyncOperands(), - entryOp.getAsyncOperandsDeviceTypeAttr(), - entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), - /*structured=*/false, /*implicit=*/false, - builder.getStringAttr(*entryOp.getName())); - } - mlir::acc::TerminatorOp::create(builder, loc); - modBuilder.setInsertionPointAfter(declareGlobalOp); -} - template <typename EntryOp> static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, @@ -4317,6 +4383,66 @@ genGlobalCtorsWithModifier(Fortran::lower::AbstractConverter &converter, dataClause); } +static fir::GlobalOp +lookupGlobalBySymbolOrEquivalence(Fortran::lower::AbstractConverter &converter, + fir::FirOpBuilder &builder, + const Fortran::semantics::Symbol &sym) { + const Fortran::semantics::Symbol *commonBlock = + Fortran::semantics::FindCommonBlockContaining(sym); + std::string globalName = commonBlock ? converter.mangleName(*commonBlock) + : converter.mangleName(sym); + if (fir::GlobalOp g = builder.getNamedGlobal(globalName)) { + return g; + } + // Not found: if not a COMMON member, try equivalence members + if (!commonBlock) { + if (const Fortran::semantics::EquivalenceSet *eqSet = + Fortran::semantics::FindEquivalenceSet(sym)) { + for (const Fortran::semantics::EquivalenceObject &eqObj : *eqSet) { + std::string eqName = converter.mangleName(eqObj.symbol); + if (fir::GlobalOp g = builder.getNamedGlobal(eqName)) + return g; + } + } + } + return {}; +} + +template <typename EmitterFn> +static void emitCommonGlobal(Fortran::lower::AbstractConverter &converter, + fir::FirOpBuilder &builder, + const Fortran::parser::AccObject &obj, + mlir::acc::DataClause clause, + EmitterFn &&emitCtorDtor) { + Fortran::semantics::Symbol &sym = getSymbolFromAccObject(obj); + if (!(sym.detailsIf<Fortran::semantics::CommonBlockDetails>() || + Fortran::semantics::FindCommonBlockContaining(sym))) + return; + + fir::GlobalOp globalOp = + lookupGlobalBySymbolOrEquivalence(converter, builder, sym); + if (!globalOp) + llvm::report_fatal_error("could not retrieve global symbol"); + + std::stringstream ctorName; + ctorName << globalOp.getSymName().str() << "_acc_ctor"; + if (builder.getModule().lookupSymbol<mlir::acc::GlobalConstructorOp>( + ctorName.str())) + return; + + mlir::Location operandLocation = genOperandLocation(converter, obj); + addDeclareAttr(builder, globalOp.getOperation(), clause); + mlir::OpBuilder modBuilder(builder.getModule().getBodyRegion()); + modBuilder.setInsertionPointAfter(globalOp); + std::stringstream asFortran; + asFortran << sym.name().ToString(); + + auto savedIP = builder.saveInsertionPoint(); + emitCtorDtor(modBuilder, operandLocation, globalOp, clause, asFortran, + ctorName.str()); + builder.restoreInsertionPoint(savedIP); +} + static void genDeclareInFunction(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semanticsContext, @@ -4342,11 +4468,9 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter, dataClauseOperands.end()); } else if (const auto *createClause = std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { - const Fortran::parser::AccObjectListWithModifier &listWithModifier = - createClause->v; - const auto &accObjectList = - std::get<Fortran::parser::AccObjectList>(listWithModifier.t); auto crtDataStart = dataClauseOperands.size(); + const auto &accObjectList = + std::get<Fortran::parser::AccObjectList>(createClause->v.t); genDeclareDataOperandOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>( accObjectList, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_create, @@ -4378,11 +4502,9 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter, } else if (const auto *copyoutClause = std::get_if<Fortran::parser::AccClause::Copyout>( &clause.u)) { - const Fortran::parser::AccObjectListWithModifier &listWithModifier = - copyoutClause->v; - const auto &accObjectList = - std::get<Fortran::parser::AccObjectList>(listWithModifier.t); auto crtDataStart = dataClauseOperands.size(); + const auto &accObjectList = + std::get<Fortran::parser::AccObjectList>(copyoutClause->v.t); genDeclareDataOperandOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>( accObjectList, converter, semanticsContext, stmtCtx, @@ -4423,6 +4545,11 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter, } } + // If no structured operands were generated (all objects were COMMON), + // do not create a declare region. + if (dataClauseOperands.empty()) + return; + mlir::func::FuncOp funcOp = builder.getFunction(); auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>(); mlir::Value declareToken; diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 85398be..1c163e6 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1080,9 +1080,8 @@ bool ClauseProcessor::processHasDeviceAddr( [&](const omp::clause::HasDeviceAddr &clause, const parser::CharBlock &source) { mlir::Location location = converter.genLocation(source); - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::ClauseMapFlags mapTypeBits = + mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::implicit; omp::ObjectList baseObjects; llvm::transform(clause.v, std::back_inserter(baseObjects), [&](const omp::Object &object) { @@ -1217,8 +1216,7 @@ bool ClauseProcessor::processLink( void ClauseProcessor::processMapObjects( lower::StatementContext &stmtCtx, mlir::Location clauseLocation, - const omp::ObjectList &objects, - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits, + const omp::ObjectList &objects, mlir::omp::ClauseMapFlags mapTypeBits, std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices, llvm::SmallVectorImpl<mlir::Value> &mapVars, llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms, @@ -1310,10 +1308,7 @@ void ClauseProcessor::processMapObjects( mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds, - /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - mapTypeBits), + /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, mapTypeBits, mlir::omp::VariableCaptureKind::ByRef, baseOp.getType(), /*partialMap=*/false, mapperId); @@ -1347,8 +1342,7 @@ bool ClauseProcessor::processMap( objects] = clause.t; if (attachMod) TODO(currentLocation, "ATTACH modifier is not implemented yet"); - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; + mlir::omp::ClauseMapFlags mapTypeBits = mlir::omp::ClauseMapFlags::none; std::string mapperIdName = "__implicit_mapper"; // If the map type is specified, then process it else set the appropriate // default value @@ -1364,36 +1358,32 @@ bool ClauseProcessor::processMap( switch (type) { case Map::MapType::To: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapTypeBits |= mlir::omp::ClauseMapFlags::to; break; case Map::MapType::From: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapTypeBits |= mlir::omp::ClauseMapFlags::from; break; case Map::MapType::Tofrom: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapTypeBits |= + mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from; break; case Map::MapType::Storage: - // alloc and release is the default map_type for the Target Data - // Ops, i.e. if no bits for map_type is supplied then alloc/release - // (aka storage in 6.0+) is implicitly assumed based on the target - // directive. Default value for Target Data and Enter Data is alloc - // and for Exit Data it is release. + mapTypeBits |= mlir::omp::ClauseMapFlags::storage; break; } if (typeMods) { // TODO: Still requires "self" modifier, an OpenMP 6.0+ feature if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Always)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + mapTypeBits |= mlir::omp::ClauseMapFlags::always; if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Present)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + mapTypeBits |= mlir::omp::ClauseMapFlags::present; if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Close)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; + mapTypeBits |= mlir::omp::ClauseMapFlags::close; if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Delete)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; + mapTypeBits |= mlir::omp::ClauseMapFlags::del; if (llvm::is_contained(*typeMods, Map::MapTypeModifier::OmpxHold)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + mapTypeBits |= mlir::omp::ClauseMapFlags::ompx_hold; } if (iterator) { @@ -1437,12 +1427,12 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx, TODO(clauseLocation, "Iterator modifier is not supported yet"); } - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = + mlir::omp::ClauseMapFlags mapTypeBits = std::is_same_v<llvm::remove_cvref_t<decltype(clause)>, omp::clause::To> - ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO - : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + ? mlir::omp::ClauseMapFlags::to + : mlir::omp::ClauseMapFlags::from; if (expectation && *expectation == omp::clause::To::Expectation::Present) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + mapTypeBits |= mlir::omp::ClauseMapFlags::present; processMapObjects(stmtCtx, clauseLocation, objects, mapTypeBits, parentMemberIndices, result.mapVars, mapSymbols); }; @@ -1568,8 +1558,8 @@ bool ClauseProcessor::processUseDeviceAddr( [&](const omp::clause::UseDeviceAddr &clause, const parser::CharBlock &source) { mlir::Location location = converter.genLocation(source); - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + mlir::omp::ClauseMapFlags mapTypeBits = + mlir::omp::ClauseMapFlags::return_param; processMapObjects(stmtCtx, location, clause.v, mapTypeBits, parentMemberIndices, result.useDeviceAddrVars, useDeviceSyms); @@ -1589,8 +1579,8 @@ bool ClauseProcessor::processUseDevicePtr( [&](const omp::clause::UseDevicePtr &clause, const parser::CharBlock &source) { mlir::Location location = converter.genLocation(source); - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + mlir::omp::ClauseMapFlags mapTypeBits = + mlir::omp::ClauseMapFlags::return_param; processMapObjects(stmtCtx, location, clause.v, mapTypeBits, parentMemberIndices, result.useDevicePtrVars, useDeviceSyms); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 9e352fa..6452e39 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -194,8 +194,7 @@ private: void processMapObjects( lower::StatementContext &stmtCtx, mlir::Location clauseLocation, - const omp::ObjectList &objects, - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits, + const omp::ObjectList &objects, mlir::omp::ClauseMapFlags mapTypeBits, std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices, llvm::SmallVectorImpl<mlir::Value> &mapVars, llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms, diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 2a4ebf1..d39f9dd 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -16,8 +16,6 @@ #include "flang/Semantics/openmp-modifiers.h" #include "flang/Semantics/symbol.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" - #include <list> #include <optional> #include <tuple> diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 9495ea6..a49961c 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -45,7 +45,6 @@ #include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; @@ -945,8 +944,7 @@ getDefaultmapIfPresent(const DefaultMapsTy &defaultMaps, mlir::Type varType) { return DefMap::ImplicitBehavior::Default; } -static std::pair<llvm::omp::OpenMPOffloadMappingFlags, - mlir::omp::VariableCaptureKind> +static std::pair<mlir::omp::ClauseMapFlags, mlir::omp::VariableCaptureKind> getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, lower::AbstractConverter &converter, const DefaultMapsTy &defaultMaps, mlir::Type varType, @@ -967,8 +965,7 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, return size <= ptrSize && align <= ptrAlign; }; - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit; auto implicitBehaviour = getDefaultmapIfPresent(defaultMaps, varType); if (implicitBehaviour == DefMap::ImplicitBehavior::Default) { @@ -986,8 +983,8 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, mlir::omp::DeclareTargetCaptureClause::link && declareTargetOp.getDeclareTargetDeviceType() != mlir::omp::DeclareTargetDeviceType::nohost) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapFlag |= mlir::omp::ClauseMapFlags::to; + mapFlag |= mlir::omp::ClauseMapFlags::from; } } else if (fir::isa_trivial(varType) || fir::isa_char(varType)) { // Scalars behave as if they were "firstprivate". @@ -996,18 +993,18 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, if (isLiteralType(varType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= mlir::omp::ClauseMapFlags::to; } } else if (!fir::isa_builtin_cptr_type(varType)) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapFlag |= mlir::omp::ClauseMapFlags::to; + mapFlag |= mlir::omp::ClauseMapFlags::from; } return std::make_pair(mapFlag, captureKind); } switch (implicitBehaviour) { case DefMap::ImplicitBehavior::Alloc: - return std::make_pair(llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE, + return std::make_pair(mlir::omp::ClauseMapFlags::storage, mlir::omp::VariableCaptureKind::ByRef); break; case DefMap::ImplicitBehavior::Firstprivate: @@ -1016,26 +1013,22 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, "behaviour"); break; case DefMap::ImplicitBehavior::From: - return std::make_pair(mapFlag |= - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM, + return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from, mlir::omp::VariableCaptureKind::ByRef); break; case DefMap::ImplicitBehavior::Present: - return std::make_pair(mapFlag |= - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT, + return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::present, mlir::omp::VariableCaptureKind::ByRef); break; case DefMap::ImplicitBehavior::To: - return std::make_pair(mapFlag |= - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO, + return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::to, (fir::isa_trivial(varType) || fir::isa_char(varType)) ? mlir::omp::VariableCaptureKind::ByCopy : mlir::omp::VariableCaptureKind::ByRef); break; case DefMap::ImplicitBehavior::Tofrom: - return std::make_pair(mapFlag |= - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO, + return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from | + mlir::omp::ClauseMapFlags::to, mlir::omp::VariableCaptureKind::ByRef); break; case DefMap::ImplicitBehavior::Default: @@ -1044,9 +1037,8 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder, break; } - return std::make_pair(mapFlag |= - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO, + return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from | + mlir::omp::ClauseMapFlags::to, mlir::omp::VariableCaptureKind::ByRef); } @@ -2612,18 +2604,14 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType())) eleType = refType.getElementType(); - std::pair<llvm::omp::OpenMPOffloadMappingFlags, - mlir::omp::VariableCaptureKind> + std::pair<mlir::omp::ClauseMapFlags, mlir::omp::VariableCaptureKind> mapFlagAndKind = getImplicitMapTypeAndKind( firOpBuilder, converter, defaultMaps, eleType, loc, sym); mlir::Value mapOp = createMapInfoOp( firOpBuilder, converter.getCurrentLocation(), baseOp, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - std::get<0>(mapFlagAndKind)), + /*membersIndex=*/mlir::ArrayAttr{}, std::get<0>(mapFlagAndKind), std::get<1>(mapFlagAndKind), baseOp.getType(), /*partialMap=*/false, mapperId); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 37b926e..6487f59 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -273,7 +273,7 @@ mlir::Value createParentSymAndGenIntermediateMaps( semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices, OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran, - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits) { + mlir::omp::ClauseMapFlags mapTypeBits) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); /// Checks if an omp::Object is an array expression with a subscript, e.g. @@ -414,11 +414,10 @@ mlir::Value createParentSymAndGenIntermediateMaps( // be safer to just pass OMP_MAP_NONE as the map type, but we may still // need some of the other map types the mapped member utilises, so for // now it's good to keep an eye on this. - llvm::omp::OpenMPOffloadMappingFlags interimMapType = mapTypeBits; - interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; - interimMapType &= - ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + mlir::omp::ClauseMapFlags interimMapType = mapTypeBits; + interimMapType &= ~mlir::omp::ClauseMapFlags::to; + interimMapType &= ~mlir::omp::ClauseMapFlags::from; + interimMapType &= ~mlir::omp::ClauseMapFlags::return_param; // Create a map for the intermediate member and insert it and it's // indices into the parentMemberIndices list to track it. @@ -427,10 +426,7 @@ mlir::Value createParentSymAndGenIntermediateMaps( /*varPtrPtr=*/mlir::Value{}, asFortran, /*bounds=*/interimBounds, /*members=*/{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - interimMapType), + /*membersIndex=*/mlir::ArrayAttr{}, interimMapType, mlir::omp::VariableCaptureKind::ByRef, curValue.getType()); parentMemberIndices.memberPlacementIndices.push_back(interimIndices); @@ -563,7 +559,8 @@ void insertChildMapInfoIntoParent( // it allows this to work with enter and exit without causing MLIR // verification issues. The more appropriate thing may be to take // the "main" map type clause from the directive being used. - uint64_t mapType = indices.second.memberMap[0].getMapType(); + mlir::omp::ClauseMapFlags mapType = + indices.second.memberMap[0].getMapType(); llvm::SmallVector<mlir::Value> members; members.reserve(indices.second.memberMap.size()); diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 69499f9..ef1f37a 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -134,7 +134,7 @@ mlir::Value createParentSymAndGenIntermediateMaps( semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices, OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran, - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits); + mlir::omp::ClauseMapFlags mapTypeBits); omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember, semantics::SemanticsContext &semaCtx); diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp index 8b99913..817434f 100644 --- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -20,8 +20,6 @@ #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" - namespace flangomp { #define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS #include "flang/Optimizer/OpenMP/Passes.h.inc" @@ -120,12 +118,9 @@ class AutomapToTargetDataPass builder, memOp.getLoc(), memOp.getMemref().getType(), memOp.getMemref(), TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())), - builder.getIntegerAttr( - builder.getIntegerType(64, false), - static_cast<unsigned>( - isa<fir::StoreOp>(memOp) - ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO - : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)), + builder.getAttr<omp::ClauseMapFlagsAttr>( + isa<fir::StoreOp>(memOp) ? omp::ClauseMapFlags::to + : omp::ClauseMapFlags::del), builder.getAttr<omp::VariableCaptureKindAttr>( omp::VariableCaptureKind::ByCopy), /*var_ptr_ptr=*/mlir::Value{}, diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 03ff163..65a23be 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -22,7 +22,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" namespace flangomp { #define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS @@ -568,16 +567,15 @@ private: if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType)) eleType = refType.getElementType(); - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapFlag |= mlir::omp::ClauseMapFlags::to; + mapFlag |= mlir::omp::ClauseMapFlags::from; } llvm::SmallVector<mlir::Value> boundsOps; @@ -587,11 +585,8 @@ private: builder, liveIn.getLoc(), rawAddr, /*varPtrPtr=*/{}, name.str(), boundsOps, /*members=*/{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - mapFlag), - captureKind, rawAddr.getType()); + /*membersIndex=*/mlir::ArrayAttr{}, mapFlag, captureKind, + rawAddr.getType()); } mlir::omp::TargetOp diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp index 9278e17..8a9b383 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp @@ -719,10 +719,9 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp, SmallVector<Value> outerMapInfos; // Create new mapinfo ops for the inner target region for (auto mapInfo : mapInfos) { - auto originalMapType = - (llvm::omp::OpenMPOffloadMappingFlags)(mapInfo.getMapType()); + mlir::omp::ClauseMapFlags originalMapType = mapInfo.getMapType(); auto originalCaptureType = mapInfo.getMapCaptureType(); - llvm::omp::OpenMPOffloadMappingFlags newMapType; + mlir::omp::ClauseMapFlags newMapType; mlir::omp::VariableCaptureKind newCaptureType; // For bycopy, we keep the same map type and capture type // For byref, we change the map type to none and keep the capture type @@ -730,7 +729,7 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp, newMapType = originalMapType; newCaptureType = originalCaptureType; } else if (originalCaptureType == mlir::omp::VariableCaptureKind::ByRef) { - newMapType = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; + newMapType = mlir::omp::ClauseMapFlags::storage; newCaptureType = originalCaptureType; outerMapInfos.push_back(mapInfo); } else { @@ -738,11 +737,8 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp, return failure(); } auto innerMapInfo = cast<omp::MapInfoOp>(rewriter.clone(*mapInfo)); - innerMapInfo.setMapTypeAttr(rewriter.getIntegerAttr( - rewriter.getIntegerType(64, false), - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - newMapType))); + innerMapInfo.setMapTypeAttr( + rewriter.getAttr<omp::ClauseMapFlagsAttr>(newMapType)); innerMapInfo.setMapCaptureType(newCaptureType); innerMapInfos.push_back(innerMapInfo.getResult()); } @@ -834,11 +830,11 @@ static TempOmpVar allocateTempOmpVar(Location loc, Type ty, alloc = rewriter.create<fir::AllocaOp>(loc, allocType); } // Lambda to create mapinfo ops - auto getMapInfo = [&](uint64_t mappingFlags, const char *name) { + auto getMapInfo = [&](mlir::omp::ClauseMapFlags mappingFlags, + const char *name) { return rewriter.create<omp::MapInfoOp>( loc, alloc.getType(), alloc, TypeAttr::get(allocType), - rewriter.getIntegerAttr(rewriter.getIntegerType(64, /*isSigned=*/false), - mappingFlags), + rewriter.getAttr<omp::ClauseMapFlagsAttr>(mappingFlags), rewriter.getAttr<omp::VariableCaptureKindAttr>( omp::VariableCaptureKind::ByRef), /*varPtrPtr=*/Value{}, @@ -849,14 +845,10 @@ static TempOmpVar allocateTempOmpVar(Location loc, Type ty, /*name=*/rewriter.getStringAttr(name), rewriter.getBoolAttr(false)); }; // Create mapinfo ops. - uint64_t mapFrom = - static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM); - uint64_t mapTo = - static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); - auto mapInfoFrom = getMapInfo(mapFrom, "__flang_workdistribute_from"); - auto mapInfoTo = getMapInfo(mapTo, "__flang_workdistribute_to"); + auto mapInfoFrom = getMapInfo(mlir::omp::ClauseMapFlags::from, + "__flang_workdistribute_from"); + auto mapInfoTo = + getMapInfo(mlir::omp::ClauseMapFlags::to, "__flang_workdistribute_to"); return TempOmpVar{mapInfoFrom, mapInfoTo}; } diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 2bbd803..566e88b 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -43,7 +43,6 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstddef> @@ -350,7 +349,7 @@ class MapInfoFinalizationPass /// the descriptor map onto the base address map. mlir::omp::MapInfoOp genBaseAddrMap(mlir::Value descriptor, mlir::OperandRange bounds, - int64_t mapType, + mlir::omp::ClauseMapFlags mapType, fir::FirOpBuilder &builder) { mlir::Location loc = descriptor.getLoc(); mlir::Value baseAddrAddr = fir::BoxOffsetOp::create( @@ -368,7 +367,7 @@ class MapInfoFinalizationPass return mlir::omp::MapInfoOp::create( builder, loc, baseAddrAddr.getType(), descriptor, mlir::TypeAttr::get(underlyingVarType), - builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(mapType), builder.getAttr<mlir::omp::VariableCaptureKindAttr>( mlir::omp::VariableCaptureKind::ByRef), baseAddrAddr, /*members=*/mlir::SmallVector<mlir::Value>{}, @@ -428,22 +427,22 @@ class MapInfoFinalizationPass /// allowing `to` mappings, and `target update` not allowing both `to` and /// `from` simultaneously. We currently try to maintain the `implicit` flag /// where necessary, although it does not seem strictly required. - unsigned long getDescriptorMapType(unsigned long mapTypeFlag, - mlir::Operation *target) { - using mapFlags = llvm::omp::OpenMPOffloadMappingFlags; + mlir::omp::ClauseMapFlags + getDescriptorMapType(mlir::omp::ClauseMapFlags mapTypeFlag, + mlir::Operation *target) { + using mapFlags = mlir::omp::ClauseMapFlags; if (llvm::isa_and_nonnull<mlir::omp::TargetExitDataOp, mlir::omp::TargetUpdateOp>(target)) return mapTypeFlag; - mapFlags flags = mapFlags::OMP_MAP_TO | - (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + mapFlags flags = + mapFlags::to | (mapTypeFlag & (mapFlags::implicit | mapFlags::always)); // For unified_shared_memory, we additionally add `CLOSE` on the descriptor // to ensure device-local placement where required by tests relying on USM + // close semantics. if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>())) - flags |= mapFlags::OMP_MAP_CLOSE; - return llvm::to_underlying(flags); + flags |= mapFlags::close; + return flags; } /// Check if the mapOp is present in the HasDeviceAddr clause on @@ -493,11 +492,6 @@ class MapInfoFinalizationPass mlir::Value boxAddr = fir::BoxOffsetOp::create( builder, loc, op.getVarPtr(), fir::BoxFieldAttr::base_addr); - uint64_t mapTypeToImplicit = static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); - mlir::ArrayAttr newMembersAttr; llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}}; newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); @@ -506,8 +500,9 @@ class MapInfoFinalizationPass mlir::omp::MapInfoOp memberMapInfoOp = mlir::omp::MapInfoOp::create( builder, op.getLoc(), varPtr.getType(), varPtr, mlir::TypeAttr::get(boxCharType.getEleTy()), - builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false), - mapTypeToImplicit), + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>( + mlir::omp::ClauseMapFlags::to | + mlir::omp::ClauseMapFlags::implicit), builder.getAttr<mlir::omp::VariableCaptureKindAttr>( mlir::omp::VariableCaptureKind::ByRef), /*varPtrPtr=*/boxAddr, @@ -568,12 +563,9 @@ class MapInfoFinalizationPass mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); // Force CLOSE in USM paths so the pointer gets device-local placement // when required by tests relying on USM + close semantics. - uint64_t mapTypeVal = - op.getMapType() | - llvm::to_underlying( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); - mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( - builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + mlir::omp::ClauseMapFlagsAttr mapTypeAttr = + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>( + op.getMapType() | mlir::omp::ClauseMapFlags::close); mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( builder, loc, coord.getType(), coord, @@ -683,17 +675,16 @@ class MapInfoFinalizationPass // one place in the code may differ from that address in another place. // The contents of the descriptor (the base address in particular) will // remain unchanged though. - uint64_t mapType = op.getMapType(); + mlir::omp::ClauseMapFlags mapType = op.getMapType(); if (isHasDeviceAddrFlag) { - mapType |= llvm::to_underlying( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS); + mapType |= mlir::omp::ClauseMapFlags::always; } mlir::omp::MapInfoOp newDescParentMapOp = mlir::omp::MapInfoOp::create( builder, op->getLoc(), op.getResult().getType(), descriptor, mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())), - builder.getIntegerAttr(builder.getIntegerType(64, false), - getDescriptorMapType(mapType, target)), + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>( + getDescriptorMapType(mapType, target)), op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers, newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{}, /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), @@ -896,11 +887,9 @@ class MapInfoFinalizationPass builder.create<mlir::omp::MapInfoOp>( op->getLoc(), op.getResult().getType(), op.getVarPtr(), op.getVarTypeAttr(), - builder.getIntegerAttr( - builder.getIntegerType(64, false), - llvm::to_underlying( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)), + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>( + mlir::omp::ClauseMapFlags::to | + mlir::omp::ClauseMapFlags::always), op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, mlir::SmallVector<mlir::Value>{}, mlir::ArrayAttr{}, /*bounds=*/mlir::SmallVector<mlir::Value>{}, @@ -1240,9 +1229,8 @@ class MapInfoFinalizationPass // we need to change this check for early return OR live with // over-mapping. bool hasImplicitMap = - (llvm::omp::OpenMPOffloadMappingFlags(op.getMapType()) & - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT) == - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + (op.getMapType() & mlir::omp::ClauseMapFlags::implicit) == + mlir::omp::ClauseMapFlags::implicit; if (hasImplicitMap) return; diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 3032857..0972861 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -35,7 +35,6 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Support/Debug.h" #include <type_traits> @@ -70,9 +69,6 @@ class MapsForPrivatizedSymbolsPass return size <= ptrSize && align <= ptrAlign; }; - uint64_t mapTypeTo = static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); Operation *definingOp = var.getDefiningOp(); Value varPtr = var; @@ -122,8 +118,7 @@ class MapsForPrivatizedSymbolsPass builder, loc, varPtr.getType(), varPtr, TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType()) .getElementType()), - builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false), - mapTypeTo), + builder.getAttr<omp::ClauseMapFlagsAttr>(omp::ClauseMapFlags::to), builder.getAttr<omp::VariableCaptureKindAttr>(captureKind), /*varPtrPtr=*/Value{}, /*members=*/SmallVector<Value>{}, diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 759e3a65d..8d00272 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -454,6 +454,8 @@ struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> { mlir::LogicalResult matchAndRewrite(fir::DeclareOp op, mlir::PatternRewriter &rewriter) const override { + if (op.getResult().getUsers().empty()) + return success(); if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) { if (auto global = symTab.lookup<fir::GlobalOp>( addrOfOp.getSymbol().getRootReference().getValue())) { @@ -963,6 +965,8 @@ public: } target.addDynamicallyLegalOp<fir::DeclareOp>([&](fir::DeclareOp op) { + if (op.getResult().getUsers().empty()) + return true; if (inDeviceContext(op)) return true; if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) { diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index ad035e6..0dec5652 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -75,21 +75,21 @@ TYPE_PARSER( // tile size is one of: // * (represented as an empty std::optional<ScalarIntExpr>) // constant-int-expr -TYPE_PARSER(construct<AccTileExpr>(scalarIntConstantExpr) || +TYPE_PARSER(sourced(construct<AccTileExpr>(scalarIntConstantExpr) || construct<AccTileExpr>( - "*" >> construct<std::optional<ScalarIntConstantExpr>>())) + "*" >> construct<std::optional<ScalarIntConstantExpr>>()))) TYPE_PARSER(construct<AccTileExprList>(nonemptyList(Parser<AccTileExpr>{}))) // 2.9 (1979-1982) gang-arg is one of : // [num:]int-expr // dim:int-expr // static:size-expr -TYPE_PARSER(construct<AccGangArg>(construct<AccGangArg::Static>( - "STATIC: " >> Parser<AccSizeExpr>{})) || +TYPE_PARSER(sourced(construct<AccGangArg>(construct<AccGangArg::Static>( + "STATIC: " >> Parser<AccSizeExpr>{})) || construct<AccGangArg>( construct<AccGangArg::Dim>("DIM: " >> scalarIntExpr)) || construct<AccGangArg>( - construct<AccGangArg::Num>(maybe("NUM: "_tok) >> scalarIntExpr))) + construct<AccGangArg::Num>(maybe("NUM: "_tok) >> scalarIntExpr)))) // 2.9 gang-arg-list TYPE_PARSER( @@ -101,7 +101,7 @@ TYPE_PARSER(construct<AccCollapseArg>( // 2.5.15 Reduction, F'2023 R1131, and CUF reduction-op // Operator for reduction -TYPE_PARSER(sourced(construct<ReductionOperator>( +TYPE_PARSER(construct<ReductionOperator>( first("+" >> pure(ReductionOperator::Operator::Plus), "*" >> pure(ReductionOperator::Operator::Multiply), "MAX" >> pure(ReductionOperator::Operator::Max), @@ -112,32 +112,32 @@ TYPE_PARSER(sourced(construct<ReductionOperator>( ".AND." >> pure(ReductionOperator::Operator::And), ".OR." >> pure(ReductionOperator::Operator::Or), ".EQV." >> pure(ReductionOperator::Operator::Eqv), - ".NEQV." >> pure(ReductionOperator::Operator::Neqv))))) + ".NEQV." >> pure(ReductionOperator::Operator::Neqv)))) // 2.15.1 Bind clause -TYPE_PARSER(sourced(construct<AccBindClause>(name)) || - sourced(construct<AccBindClause>(scalarDefaultCharExpr))) +TYPE_PARSER(sourced(construct<AccBindClause>(name) || + construct<AccBindClause>(scalarDefaultCharExpr))) // 2.5.16 Default clause -TYPE_PARSER(construct<AccDefaultClause>( +TYPE_PARSER(sourced(construct<AccDefaultClause>( first("NONE" >> pure(llvm::acc::DefaultValue::ACC_Default_none), - "PRESENT" >> pure(llvm::acc::DefaultValue::ACC_Default_present)))) + "PRESENT" >> pure(llvm::acc::DefaultValue::ACC_Default_present))))) // SELF clause is either a simple optional condition for compute construct // or a synonym of the HOST clause for the update directive 2.14.4 holding // an object list. -TYPE_PARSER( +TYPE_PARSER(sourced( construct<AccSelfClause>(Parser<AccObjectList>{}) / lookAhead(")"_tok) || - construct<AccSelfClause>(scalarLogicalExpr / lookAhead(")"_tok)) || + construct<AccSelfClause>(scalarLogicalExpr) / lookAhead(")"_tok) || construct<AccSelfClause>( recovery(fail<std::optional<ScalarLogicalExpr>>( "logical expression or object list expected"_err_en_US), - SkipTo<')'>{} >> pure<std::optional<ScalarLogicalExpr>>()))) + SkipTo<')'>{} >> pure<std::optional<ScalarLogicalExpr>>())))) // Modifier for copyin, copyout, cache and create -TYPE_PARSER(construct<AccDataModifier>( +TYPE_PARSER(sourced(construct<AccDataModifier>( first("ZERO:" >> pure(AccDataModifier::Modifier::Zero), - "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly)))) + "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))) // Combined directives TYPE_PARSER(sourced(construct<AccCombinedDirective>( @@ -166,14 +166,13 @@ TYPE_PARSER(sourced(construct<AccStandaloneDirective>( TYPE_PARSER(sourced(construct<AccLoopDirective>( first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop))))) -TYPE_PARSER(construct<AccBeginLoopDirective>( - sourced(Parser<AccLoopDirective>{}), Parser<AccClauseList>{})) +TYPE_PARSER(sourced(construct<AccBeginLoopDirective>( + Parser<AccLoopDirective>{}, Parser<AccClauseList>{}))) TYPE_PARSER(construct<AccEndLoop>("END LOOP"_tok)) TYPE_PARSER(construct<OpenACCLoopConstruct>( - sourced(Parser<AccBeginLoopDirective>{} / endAccLine), - maybe(Parser<DoConstruct>{}), + Parser<AccBeginLoopDirective>{} / endAccLine, maybe(Parser<DoConstruct>{}), maybe(startAccLine >> Parser<AccEndLoop>{} / endAccLine))) // 2.15.1 Routine directive @@ -186,8 +185,8 @@ TYPE_PARSER(sourced( parenthesized(Parser<AccObjectListWithModifier>{})))) // 2.11 Combined constructs -TYPE_PARSER(construct<AccBeginCombinedDirective>( - sourced(Parser<AccCombinedDirective>{}), Parser<AccClauseList>{})) +TYPE_PARSER(sourced(construct<AccBeginCombinedDirective>( + Parser<AccCombinedDirective>{}, Parser<AccClauseList>{}))) // 2.12 Atomic constructs TYPE_PARSER(construct<AccEndAtomic>(startAccLine >> "END ATOMIC"_tok)) @@ -213,10 +212,10 @@ TYPE_PARSER("ATOMIC" >> statement(assignmentStmt), Parser<AccEndAtomic>{} / endAccLine)) TYPE_PARSER( - sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{})) || - sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{})) || - sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{})) || - sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{}))) + sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{}) || + construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{}) || + construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{}) || + construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{}))) // 2.13 Declare constructs TYPE_PARSER(sourced(construct<AccDeclarativeDirective>( @@ -250,18 +249,18 @@ TYPE_PARSER(construct<OpenACCBlockConstruct>( pure(llvm::acc::Directive::ACCD_data)))))) // Standalone constructs -TYPE_PARSER(construct<OpenACCStandaloneConstruct>( - sourced(Parser<AccStandaloneDirective>{}), Parser<AccClauseList>{})) +TYPE_PARSER(sourced(construct<OpenACCStandaloneConstruct>( + Parser<AccStandaloneDirective>{}, Parser<AccClauseList>{}))) // Standalone declarative constructs -TYPE_PARSER(construct<OpenACCStandaloneDeclarativeConstruct>( - sourced(Parser<AccDeclarativeDirective>{}), Parser<AccClauseList>{})) +TYPE_PARSER(sourced(construct<OpenACCStandaloneDeclarativeConstruct>( + Parser<AccDeclarativeDirective>{}, Parser<AccClauseList>{}))) TYPE_PARSER(startAccLine >> withMessage("expected OpenACC directive"_err_en_US, - first(sourced(construct<OpenACCDeclarativeConstruct>( - Parser<OpenACCStandaloneDeclarativeConstruct>{})), - sourced(construct<OpenACCDeclarativeConstruct>( + sourced(first(construct<OpenACCDeclarativeConstruct>( + Parser<OpenACCStandaloneDeclarativeConstruct>{}), + construct<OpenACCDeclarativeConstruct>( Parser<OpenACCRoutineConstruct>{}))))) TYPE_PARSER(sourced(construct<OpenACCEndConstruct>( @@ -293,9 +292,9 @@ TYPE_PARSER(startAccLine >> "SERIAL"_tok >> maybe("LOOP"_tok) >> pure(llvm::acc::Directive::ACCD_serial_loop)))))) -TYPE_PARSER(construct<OpenACCCombinedConstruct>( - sourced(Parser<AccBeginCombinedDirective>{} / endAccLine), +TYPE_PARSER(sourced(construct<OpenACCCombinedConstruct>( + Parser<AccBeginCombinedDirective>{} / endAccLine, maybe(Parser<DoConstruct>{}), - maybe(Parser<AccEndCombinedDirective>{} / endAccLine))) + maybe(Parser<AccEndCombinedDirective>{} / endAccLine)))) } // namespace Fortran::parser diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 66e5b2c..df0372b 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -140,17 +140,9 @@ void Prescanner::Statement() { CHECK(*at_ == '!'); } std::optional<int> condOffset; - if (InOpenMPConditionalLine()) { + if (InOpenMPConditionalLine()) { // !$ condOffset = 2; - } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && - directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && - directiveSentinel_[4] == '\0') { - // CUDA conditional compilation line. - condOffset = 5; - } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'a' && - directiveSentinel_[2] == 'c' && directiveSentinel_[3] == 'c' && - directiveSentinel_[4] == '\0') { - // OpenACC conditional compilation line. + } else if (InOpenACCOrCUDAConditionalLine()) { // !@acc or !@cuf condOffset = 5; } if (condOffset && !preprocessingOnly_) { @@ -166,7 +158,8 @@ void Prescanner::Statement() { } else { // Compiler directive. Emit normalized sentinel, squash following spaces. // Conditional compilation lines (!$) take this path in -E mode too - // so that -fopenmp only has to appear on the later compilation. + // so that -fopenmp only has to appear on the later compilation + // (ditto for !@cuf and !@acc). EmitChar(tokens, '!'); ++at_, ++column_; for (const char *sp{directiveSentinel_}; *sp != '\0'; @@ -202,7 +195,7 @@ void Prescanner::Statement() { } tokens.CloseToken(); SkipSpaces(); - if (InOpenMPConditionalLine() && inFixedForm_ && !tabInCurrentLine_ && + if (InConditionalLine() && inFixedForm_ && !tabInCurrentLine_ && column_ == 6 && *at_ != '\n') { // !$ 0 - turn '0' into a space // !$ 1 - turn '1' into '&' @@ -347,7 +340,7 @@ void Prescanner::Statement() { while (CompilerDirectiveContinuation(tokens, line.sentinel)) { newlineProvenance = GetCurrentProvenance(); } - if (preprocessingOnly_ && inFixedForm_ && InOpenMPConditionalLine() && + if (preprocessingOnly_ && inFixedForm_ && InConditionalLine() && nextLine_ < limit_) { // In -E mode, when the line after !$ conditional compilation is a // regular fixed form continuation line, append a '&' to the line. @@ -1360,11 +1353,10 @@ const char *Prescanner::FixedFormContinuationLine(bool atNewline) { features_.IsEnabled(LanguageFeature::OldDebugLines))) && nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && nextLine_[4] == ' '}; - if (InCompilerDirective() && - !(InOpenMPConditionalLine() && !preprocessingOnly_)) { + if (InCompilerDirective() && !(InConditionalLine() && !preprocessingOnly_)) { // !$ under -E is not continued, but deferred to later compilation if (IsFixedFormCommentChar(col1) && - !(InOpenMPConditionalLine() && preprocessingOnly_)) { + !(InConditionalLine() && preprocessingOnly_)) { int j{1}; for (; j < 5; ++j) { char ch{directiveSentinel_[j - 1]}; @@ -1443,7 +1435,7 @@ const char *Prescanner::FreeFormContinuationLine(bool ampersand) { } p = SkipWhiteSpaceIncludingEmptyMacros(p); if (InCompilerDirective()) { - if (InOpenMPConditionalLine()) { + if (InConditionalLine()) { if (preprocessingOnly_) { // in -E mode, don't treat !$ as a continuation return nullptr; diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index fc38adb..5e74817 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -171,7 +171,17 @@ private: bool InOpenMPConditionalLine() const { return directiveSentinel_ && directiveSentinel_[0] == '$' && !directiveSentinel_[1]; - ; + } + bool InOpenACCOrCUDAConditionalLine() const { + return directiveSentinel_ && directiveSentinel_[0] == '@' && + ((directiveSentinel_[1] == 'a' && directiveSentinel_[2] == 'c' && + directiveSentinel_[3] == 'c') || + (directiveSentinel_[1] == 'c' && directiveSentinel_[2] == 'u' && + directiveSentinel_[3] == 'f')) && + directiveSentinel_[4] == '\0'; + } + bool InConditionalLine() const { + return InOpenMPConditionalLine() || InOpenACCOrCUDAConditionalLine(); } bool InFixedFormSource() const { return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 4cb0b74..b3fd6c8 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -19,7 +19,6 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/semantics.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" using OmpClauseSet = Fortran::common::EnumSet<llvm::omp::Clause, llvm::omp::Clause_enumSize>; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 33e9ea5..b0c36ec 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -31,15 +31,17 @@ #include <list> #include <map> +namespace Fortran::semantics { + template <typename T> -static Fortran::semantics::Scope *GetScope( - Fortran::semantics::SemanticsContext &context, const T &x) { - std::optional<Fortran::parser::CharBlock> source{GetLastSource(x)}; - return source ? &context.FindScope(*source) : nullptr; +static Scope *GetScope(SemanticsContext &context, const T &x) { + if (auto source{GetLastSource(x)}) { + return &context.FindScope(*source); + } else { + return nullptr; + } } -namespace Fortran::semantics { - template <typename T> class DirectiveAttributeVisitor { public: explicit DirectiveAttributeVisitor(SemanticsContext &context) @@ -361,7 +363,7 @@ private: void ResolveAccObject(const parser::AccObject &, Symbol::Flag); Symbol *ResolveAcc(const parser::Name &, Symbol::Flag, Scope &); Symbol *ResolveAcc(Symbol &, Symbol::Flag, Scope &); - Symbol *ResolveName(const parser::Name &, bool parentScope = false); + Symbol *ResolveName(const parser::Name &); Symbol *ResolveFctName(const parser::Name &); Symbol *ResolveAccCommonBlockName(const parser::Name *); Symbol *DeclareOrMarkOtherAccessEntity(const parser::Name &, Symbol::Flag); @@ -1257,31 +1259,22 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCStandaloneConstruct &x) { return true; } -Symbol *AccAttributeVisitor::ResolveName( - const parser::Name &name, bool parentScope) { - Symbol *prev{currScope().FindSymbol(name.source)}; - // Check in parent scope if asked for. - if (!prev && parentScope) { - prev = currScope().parent().FindSymbol(name.source); - } - if (prev != name.symbol) { - name.symbol = prev; - } - return prev; +Symbol *AccAttributeVisitor::ResolveName(const parser::Name &name) { + return name.symbol; } Symbol *AccAttributeVisitor::ResolveFctName(const parser::Name &name) { Symbol *prev{currScope().FindSymbol(name.source)}; - if (!prev || (prev && prev->IsFuncResult())) { + if (prev && prev->IsFuncResult()) { prev = currScope().parent().FindSymbol(name.source); - if (!prev) { - prev = &context_.globalScope().MakeSymbol( - name.source, Attrs{}, ProcEntityDetails{}); - } } - if (prev != name.symbol) { - name.symbol = prev; + if (!prev) { + prev = &*context_.globalScope() + .try_emplace(name.source, ProcEntityDetails{}) + .first->second; } + CHECK(!name.symbol || name.symbol == prev); + name.symbol = prev; return prev; } @@ -1388,9 +1381,8 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCRoutineConstruct &x) { } else { PushContext(verbatim.source, llvm::acc::Directive::ACCD_routine); } - const auto &optName{std::get<std::optional<parser::Name>>(x.t)}; - if (optName) { - if (Symbol *sym = ResolveFctName(*optName)) { + if (const auto &optName{std::get<std::optional<parser::Name>>(x.t)}) { + if (Symbol * sym{ResolveFctName(*optName)}) { Symbol &ultimate{sym->GetUltimate()}; AddRoutineInfoToSymbol(ultimate, x); } else { @@ -1425,7 +1417,7 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCCombinedConstruct &x) { case llvm::acc::Directive::ACCD_kernels_loop: case llvm::acc::Directive::ACCD_parallel_loop: case llvm::acc::Directive::ACCD_serial_loop: - PushContext(combinedDir.source, combinedDir.v); + PushContext(x.source, combinedDir.v); break; default: break; @@ -1706,26 +1698,27 @@ void AccAttributeVisitor::Post(const parser::AccDefaultClause &x) { } } -// For OpenACC constructs, check all the data-refs within the constructs -// and adjust the symbol for each Name if necessary void AccAttributeVisitor::Post(const parser::Name &name) { - auto *symbol{name.symbol}; - if (symbol && WithinConstruct()) { - symbol = &symbol->GetUltimate(); - if (!symbol->owner().IsDerivedType() && !symbol->has<ProcEntityDetails>() && - !symbol->has<SubprogramDetails>() && !IsObjectWithVisibleDSA(*symbol)) { + if (name.symbol && WithinConstruct()) { + const Symbol &symbol{name.symbol->GetUltimate()}; + if (!symbol.owner().IsDerivedType() && !symbol.has<ProcEntityDetails>() && + !symbol.has<SubprogramDetails>() && !IsObjectWithVisibleDSA(symbol)) { if (Symbol * found{currScope().FindSymbol(name.source)}) { - if (symbol != found) { - name.symbol = found; // adjust the symbol within region + if (&symbol != found) { + // adjust the symbol within the region + // TODO: why didn't name resolution set the right name originally? + name.symbol = found; } else if (GetContext().defaultDSA == Symbol::Flag::AccNone) { // 2.5.14. context_.Say(name.source, "The DEFAULT(NONE) clause requires that '%s' must be listed in a data-mapping clause"_err_en_US, - symbol->name()); + symbol.name()); } + } else { + // TODO: assertion here? or clear name.symbol? } } - } // within OpenACC construct + } } Symbol *AccAttributeVisitor::ResolveAccCommonBlockName( @@ -1810,13 +1803,11 @@ Symbol *AccAttributeVisitor::ResolveAcc( Symbol *AccAttributeVisitor::DeclareOrMarkOtherAccessEntity( const parser::Name &name, Symbol::Flag accFlag) { - Symbol *prev{currScope().FindSymbol(name.source)}; - if (!name.symbol || !prev) { + if (name.symbol) { + return DeclareOrMarkOtherAccessEntity(*name.symbol, accFlag); + } else { return nullptr; - } else if (prev != name.symbol) { - name.symbol = prev; } - return DeclareOrMarkOtherAccessEntity(*prev, accFlag); } Symbol *AccAttributeVisitor::DeclareOrMarkOtherAccessEntity( @@ -2990,6 +2981,7 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { } Symbol *OmpAttributeVisitor::ResolveName(const parser::Name *name) { + // TODO: why is the symbol not properly resolved by name resolution? if (auto *resolvedSymbol{ name ? GetContext().scope.FindSymbol(name->source) : nullptr}) { name->symbol = resolvedSymbol; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 0af1c94..88cc446 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1441,6 +1441,30 @@ public: void Post(const parser::AccBeginLoopDirective &x) { messageHandler().set_currStmtSource(std::nullopt); } + bool Pre(const parser::OpenACCStandaloneConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } + bool Pre(const parser::OpenACCCacheConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } + bool Pre(const parser::OpenACCWaitConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } + bool Pre(const parser::OpenACCAtomicConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } + bool Pre(const parser::OpenACCEndConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } + bool Pre(const parser::OpenACCDeclarativeConstruct &x) { + currScope().AddSourceRange(x.source); + return true; + } void CopySymbolWithDevice(const parser::Name *name); @@ -1480,7 +1504,8 @@ void AccVisitor::CopySymbolWithDevice(const parser::Name *name) { // symbols are created for the one appearing in the use_device // clause. These new symbols have the CUDA Fortran device // attribute. - if (context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) { + if (context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA) && + name->symbol) { name->symbol = currScope().CopySymbol(*name->symbol); if (auto *object{name->symbol->detailsIf<ObjectEntityDetails>()}) { object->set_cudaDataAttr(common::CUDADataAttr::Device); @@ -1490,15 +1515,12 @@ void AccVisitor::CopySymbolWithDevice(const parser::Name *name) { bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) { for (const auto &accObject : x.v.v) { + Walk(accObject); common::visit( common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ parser::GetDesignatorNameIfDataRef(designator)}) { - Symbol *prev{currScope().FindSymbol(name->source)}; - if (prev != name->symbol) { - name->symbol = prev; - } CopySymbolWithDevice(name); } else { if (const auto *dataRef{ @@ -1507,13 +1529,8 @@ bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) { common::Indirection<parser::ArrayElement>; if (auto *ind{std::get_if<ElementIndirection>(&dataRef->u)}) { const parser::ArrayElement &arrayElement{ind->value()}; - Walk(arrayElement.subscripts); const parser::DataRef &base{arrayElement.base}; if (auto *name{std::get_if<parser::Name>(&base.u)}) { - Symbol *prev{currScope().FindSymbol(name->source)}; - if (prev != name->symbol) { - name->symbol = prev; - } CopySymbolWithDevice(name); } } @@ -1537,6 +1554,7 @@ void AccVisitor::Post(const parser::OpenACCBlockConstruct &x) { bool AccVisitor::Pre(const parser::OpenACCCombinedConstruct &x) { PushScope(Scope::Kind::OpenACCConstruct, nullptr); + currScope().AddSourceRange(x.source); return true; } @@ -5433,7 +5451,8 @@ void SubprogramVisitor::PushBlockDataScope(const parser::Name &name) { } } -// If name is a generic, return specific subprogram with the same name. +// If name is a generic in the same scope, return its specific subprogram with +// the same name, if any. Symbol *SubprogramVisitor::GetSpecificFromGeneric(const parser::Name &name) { // Search for the name but don't resolve it if (auto *symbol{currScope().FindSymbol(name.source)}) { @@ -5443,6 +5462,9 @@ Symbol *SubprogramVisitor::GetSpecificFromGeneric(const parser::Name &name) { // symbol doesn't inherit it and ruin the ability to check it. symbol->attrs().reset(Attr::MODULE); } + } else if (&symbol->owner() != &currScope() && inInterfaceBlock() && + !isGeneric()) { + // non-generic interface shadows outer definition } else if (auto *details{symbol->detailsIf<GenericDetails>()}) { // found generic, want specific procedure auto *specific{details->specific()}; diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index bdb5377..2606d99 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -452,6 +452,15 @@ void SemanticsContext::UpdateScopeIndex( } } +void SemanticsContext::DumpScopeIndex(llvm::raw_ostream &out) const { + out << "scopeIndex_:\n"; + for (const auto &[source, scope] : scopeIndex_) { + out << "source '" << source.ToString() << "' -> scope " << scope + << "... whose source range is '" << scope.sourceRange().ToString() + << "'\n"; + } +} + bool SemanticsContext::IsInModuleFile(parser::CharBlock source) const { for (const Scope *scope{&FindScope(source)}; !scope->IsGlobal(); scope = &scope->parent()) { diff --git a/flang/lib/Utils/OpenMP.cpp b/flang/lib/Utils/OpenMP.cpp index 2261912..15a42c3 100644 --- a/flang/lib/Utils/OpenMP.cpp +++ b/flang/lib/Utils/OpenMP.cpp @@ -22,8 +22,9 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder, mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr, llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds, llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex, - uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType, - mlir::Type retTy, bool partialMap, mlir::FlatSymbolRefAttr mapperId) { + mlir::omp::ClauseMapFlags mapType, + mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, + bool partialMap, mlir::FlatSymbolRefAttr mapperId) { if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) { baseAddr = fir::BoxAddrOp::create(builder, loc, baseAddr); @@ -42,7 +43,7 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder, mlir::omp::MapInfoOp op = mlir::omp::MapInfoOp::create(builder, loc, retTy, baseAddr, varType, - builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), + builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(mapType), builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType), varPtrPtr, members, membersIndex, bounds, mapperId, builder.getStringAttr(name), builder.getBoolAttr(partialMap)); @@ -75,8 +76,7 @@ mlir::Value mapTemporaryValue(fir::FirOpBuilder &firOpBuilder, firOpBuilder.setInsertionPoint(targetOp); - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; @@ -88,16 +88,14 @@ mlir::Value mapTemporaryValue(fir::FirOpBuilder &firOpBuilder, if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= mlir::omp::ClauseMapFlags::to; } mlir::Value mapOp = createMapInfoOp(firOpBuilder, copyVal.getLoc(), copyVal, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/llvm::SmallVector<mlir::Value>{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - mapFlag), - captureKind, copyVal.getType()); + /*membersIndex=*/mlir::ArrayAttr{}, mapFlag, captureKind, + copyVal.getType()); auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); mlir::Region ®ion = targetOp.getRegion(); diff --git a/flang/test/Fir/CUDA/cuda-global-addr.mlir b/flang/test/Fir/CUDA/cuda-global-addr.mlir index 3e50c7a..6f7816c 100644 --- a/flang/test/Fir/CUDA/cuda-global-addr.mlir +++ b/flang/test/Fir/CUDA/cuda-global-addr.mlir @@ -63,6 +63,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : // We cannot call _FortranACUFGetDeviceAddress on a constant global. // There is no symbol for it and the call would result into an unresolved reference. +// CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "arraysize"} // CHECK-NOT: fir.call {{.*}}GetDeviceAddress // ----- @@ -90,3 +91,22 @@ func.func @_QQmain() attributes {fir.bindc_name = "test"} { // CHECK-NOT: fir.call {{.*}}GetDeviceAddress } + +// ----- + +// Check that we do not introduce call to _FortranACUFGetDeviceAddress when the +// value has no user. + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + func.func @_QQmain() attributes {fir.bindc_name = "T"} { + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.address_of(@_QMcon2Ezzz) : !fir.ref<i32> + %2 = fir.declare %1 {data_attr = #cuf.cuda<constant>, uniq_name = "_QMcon2Ezzz"} : (!fir.ref<i32>) -> !fir.ref<i32> + return + } + fir.global @_QMcon2Ezzz {data_attr = #cuf.cuda<constant>} : i32 +} + +// CHECK-LABEL: func.func @_QQmain() +// CHECK: fir.address_of(@_QMcon2Ezzz) : !fir.ref<i32> +// CHECK-NOT: fir.call {{.*}}GetDeviceAddress diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 index e8951cc..ec97114 100644 --- a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 +++ b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 @@ -43,4 +43,4 @@ end module ! TODO: After using select type - the appropriate type category should be ! possible. Add the rest of the test once OpenACC lowering correctly handles -! unlimited polymorhic. +! unlimited polymorphic. diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 38d5111..30ed2f0 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -252,7 +252,7 @@ func.func @_QPomp_target_data() { %c0_6 = arith.constant 0 : index %10 = arith.subi %c1024_1, %c1_5 : index %11 = omp.map.bounds lower_bound(%c0_6 : index) upper_bound(%10 : index) extent(%c1024_1 : index) stride(%c1_5 : index) start_idx(%c1_5 : index) - %12 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%11) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} + %12 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(always, storage) capture(ByRef) bounds(%11) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} omp.target_enter_data map_entries(%6, %9, %12 : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>) %c1_7 = arith.constant 1 : index %c0_8 = arith.constant 0 : index @@ -268,7 +268,7 @@ func.func @_QPomp_target_data() { %c0_12 = arith.constant 0 : index %19 = arith.subi %c1024_1, %c1_11 : index %20 = omp.map.bounds lower_bound(%c0_12 : index) upper_bound(%19 : index) extent(%c1024_1 : index) stride(%c1_11 : index) start_idx(%c1_11 : index) - %21 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%20) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} + %21 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(storage) capture(ByRef) bounds(%20) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} %c1_13 = arith.constant 1 : index %c0_14 = arith.constant 0 : index %22 = arith.subi %c1024_2, %c1_13 : index @@ -305,7 +305,7 @@ func.func @_QPomp_target_data() { // CHECK: %[[VAL_23:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[VAL_24:.*]] = llvm.mlir.constant(1023 : index) : i64 // CHECK: %[[VAL_25:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_22]] : i64) start_idx(%[[VAL_22]] : i64) -// CHECK: %[[VAL_26:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_25]]) -> !llvm.ptr {name = "c"} +// CHECK: %[[VAL_26:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, storage) capture(ByRef) bounds(%[[VAL_25]]) -> !llvm.ptr {name = "c"} // CHECK: omp.target_enter_data map_entries(%[[VAL_16]], %[[VAL_21]], %[[VAL_26]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) // CHECK: %[[VAL_27:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[VAL_28:.*]] = llvm.mlir.constant(0 : index) : i64 @@ -321,7 +321,7 @@ func.func @_QPomp_target_data() { // CHECK: %[[VAL_38:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(1023 : index) : i64 // CHECK: %[[VAL_40:.*]] = omp.map.bounds lower_bound(%[[VAL_38]] : i64) upper_bound(%[[VAL_39]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_37]] : i64) start_idx(%[[VAL_37]] : i64) -// CHECK: %[[VAL_41:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_40]]) -> !llvm.ptr {name = "c"} +// CHECK: %[[VAL_41:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(storage) capture(ByRef) bounds(%[[VAL_40]]) -> !llvm.ptr {name = "c"} // CHECK: %[[VAL_42:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[VAL_43:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[VAL_44:.*]] = llvm.mlir.constant(1023 : index) : i64 diff --git a/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90 b/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90 new file mode 100644 index 0000000..5038f71 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90 @@ -0,0 +1,40 @@ +! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s + +! Verify that a COMMON block declared with OpenACC declare inside a function +! is lowered as a global declare (acc.global_ctor/dtor) rather than a +! structured declare. + +program p + implicit none + real :: pi + integer :: i + common /COM/ pi +!$acc declare copyin(/COM/) + data pi/0.0/ + +! CHECK-DAG: acc.global_ctor @{{.*}}_acc_ctor { +! CHECK-DAG: %[[ADDR0:.*]] = fir.address_of(@{{.*}}) {acc.declare = #acc.declare<dataClause = acc_copyin>} : {{.*}} +! CHECK-DAG: acc.declare_enter dataOperands(%{{.*}} : {{.*}}) +! CHECK-DAG: acc.terminator +! CHECK-DAG: } + +! CHECK-DAG: acc.global_dtor @{{.*}}_acc_dtor { +! CHECK-DAG: %[[ADDR1:.*]] = fir.address_of(@{{.*}}) {acc.declare = #acc.declare<dataClause = acc_copyin>} : !fir.ref<tuple<f32>> +! CHECK-DAG: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[ADDR1]] : !fir.ref<tuple<f32>>) -> !fir.ref<tuple<f32>> {dataClause = #acc<data_clause acc_copyin>, {{.*}}} +! CHECK-DAG: acc.declare_exit dataOperands(%[[GDP]] : !fir.ref<tuple<f32>>) +! CHECK-DAG: acc.delete accPtr(%[[GDP]] : !fir.ref<tuple<f32>>) {dataClause = #acc<data_clause acc_copyin>{{.*}}} +! CHECK-DAG: acc.terminator +! CHECK-DAG: } + +contains + + subroutine s() + implicit none + real :: pi + common /COM/ pi +!$acc declare copyin(/COM/) + end subroutine s + +end program p + + diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 index 39f9738..126f341 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 @@ -23,9 +23,9 @@ program test_default_implicit_firstprivate !CHECK: %[[VAL_4:.*]] = fir.declare %{{.*}} {uniq_name = "_QFEk"} : (!fir.ref<i32>) -> !fir.ref<i32> !CHECK: %[[VAL_5:.*]] = fir.declare %{{.*}} {uniq_name = "_QFExdgfx"} : (!fir.ref<i32>) -> !fir.ref<i32> !CHECK: %[[VAL_6:.*]] = fir.declare %{{.*}} {uniq_name = "_QFExfpvx"} : (!fir.ref<i32>) -> !fir.ref<i32> -!CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"} -!CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_3]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "j"} -!CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "k"} +!CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "i"} +!CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_3]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "j"} +!CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "k"} !CHECK: %[[VAL_10:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>> !CHECK: %[[VAL_11:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_10]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>>) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>> {name = ""} !CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>, !fir.box<!fir.heap<!fir.array<?x?x?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_11]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>> {name = "allocarr"} diff --git a/flang/test/Lower/OpenMP/common-block-map.f90 b/flang/test/Lower/OpenMP/common-block-map.f90 index a0a1b1f..7c690c9 100644 --- a/flang/test/Lower/OpenMP/common-block-map.f90 +++ b/flang/test/Lower/OpenMP/common-block-map.f90 @@ -36,7 +36,7 @@ end !CHECK: %[[CONV:.*]] = fir.convert %[[COORD]] : (!fir.ref<i8>) -> !fir.ref<i32> !CHECK: %[[CB_MEMBER_2:.*]]:2 = hlfir.declare %[[CONV]] storage(%[[COMMON_BLOCK]][4]) {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref<i32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<i32>, !fir.ref<i32>) !CHECK: %[[MAP_EXP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_2]]#1 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "var2"} -!CHECK: %[[MAP_IMP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "var1"} +!CHECK: %[[MAP_IMP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "var1"} !CHECK: omp.target map_entries(%[[MAP_EXP]] -> %[[ARG_EXP:.*]], %[[MAP_IMP]] -> %[[ARG_IMP:.*]] : !fir.ref<i32>, !fir.ref<i32>) { !CHECK: %[[EXP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_EXP]] {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) !CHECK: %[[IMP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_IMP]] {uniq_name = "_QFmap_mix_of_membersEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index 3d4d0da..c389d0f 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -80,7 +80,7 @@ subroutine declare_mapper_2 !CHECK: %[[VAL_8:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : index) upper_bound(%[[VAL_7]] : index) extent(%[[VAL_2]] : index) stride(%[[VAL_5]] : index) start_idx(%[[VAL_5]] : index) !CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<!fir.array<250xf32>>, !fir.array<250xf32>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_8]]) -> !fir.ref<!fir.array<250xf32>> {name = "v%[[VAL_10:.*]]"} !CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_1]]#0{"temp"} : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> - !CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> {name = "v%[[VAL_13:.*]]"} + !CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>) map_clauses(storage) capture(ByRef) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> {name = "v%[[VAL_13:.*]]"} !CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<[[MY_TYPE]]>, [[MY_TYPE]]) map_clauses(tofrom) capture(ByRef) members(%[[VAL_9]], %[[VAL_12]] : [3], [1] : !fir.ref<!fir.array<250xf32>>, !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<[[MY_TYPE]]> {name = "v", partial_map = true} !CHECK: omp.declare_mapper.info map_entries(%[[VAL_14]], %[[VAL_9]], %[[VAL_12]] : !fir.ref<[[MY_TYPE]]>, !fir.ref<!fir.array<250xf32>>, !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) !CHECK: } diff --git a/flang/test/Lower/OpenMP/defaultmap.f90 b/flang/test/Lower/OpenMP/defaultmap.f90 index 0b26f5d..b9c902f 100644 --- a/flang/test/Lower/OpenMP/defaultmap.f90 +++ b/flang/test/Lower/OpenMP/defaultmap.f90 @@ -5,7 +5,7 @@ subroutine defaultmap_allocatable_present() implicit none integer, dimension(:), allocatable :: arr -! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, present, exit_release_or_enter_alloc) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, present) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} ! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr"} !$omp target defaultmap(present: allocatable) arr(1) = 10 @@ -32,7 +32,7 @@ subroutine defaultmap_all_default() integer :: aggregate(16) integer :: scalar_int -! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"} +! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"} ! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} ! CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr"} ! CHECK: %[[MAP_4:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.array<16xi32>>, !fir.array<16xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{.*}}) -> !fir.ref<!fir.array<16xi32>> {name = "aggregate"} @@ -54,7 +54,7 @@ subroutine defaultmap_pointer_to() ! CHECK-FPRIV: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} ! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "arr_ptr"} ! CHECK-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32> -! CHECK-NO-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"} +! CHECK-NO-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"} !$omp target defaultmap(to: pointer) arr_ptr(1) = scalar_int + 20 !$omp end target diff --git a/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90 b/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90 index 8d8c043..e7bced4 100644 --- a/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90 +++ b/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90 @@ -17,7 +17,7 @@ end ! Check that the map.info for `a` only takes a single parameter. -!CHECK-DAG: %[[MAP_A:[0-9]+]] = "omp.map.info"(%[[STORAGE_A:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = 517 : ui64, name = "a", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = !fir.box<!fir.array<?xi32>>}> : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> -!CHECK-DAG: %[[MAP_T:[0-9]+]] = "omp.map.info"(%[[STORAGE_T:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = 2 : ui64, name = "t", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = i32}> : (!fir.ref<i32>) -> !fir.ref<i32> +!CHECK-DAG: %[[MAP_A:[0-9]+]] = "omp.map.info"(%[[STORAGE_A:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = #omp<clause_map_flags to|always|implicit>, name = "a", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = !fir.box<!fir.array<?xi32>>}> : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> +!CHECK-DAG: %[[MAP_T:[0-9]+]] = "omp.map.info"(%[[STORAGE_T:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = #omp<clause_map_flags from>, name = "t", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = i32}> : (!fir.ref<i32>) -> !fir.ref<i32> !CHECK: "omp.target"(%[[MAP_A]], %[[MAP_T]]) diff --git a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 index ab2cdf3..76dba67 100644 --- a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 +++ b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 @@ -11,7 +11,7 @@ !HLFIRDIALECT: %[[B_DECLARE:.*]]:2 = hlfir.declare %[[B_ALLOCA]](%[[B_SHAPE]]) {uniq_name = "_QFlocal_variable_intrinsic_sizeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) !HLFIRDIALECT: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}} : index) upper_bound({{.*}} : index) extent({{.*}} : index) stride({{.*}} : index) start_idx({{.*}} : index) {stride_in_bytes = true} !HLFIRDIALECT: %[[MAP_DATA_B:.*]] = omp.map.info var_ptr(%[[B_DECLARE]]#1 : !fir.ref<!fir.array<?xf32>>, f32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xf32>> {name = "b"} -!HLFIRDIALECT: %[[MAP_DATA_SZ:.*]] = omp.map.info var_ptr(%[[SZ_DATA]] : !fir.ref<index>, index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<index> {name = ""} +!HLFIRDIALECT: %[[MAP_DATA_SZ:.*]] = omp.map.info var_ptr(%[[SZ_DATA]] : !fir.ref<index>, index) map_clauses(implicit) capture(ByCopy) -> !fir.ref<index> {name = ""} !HLFIRDIALECT: omp.target map_entries(%[[MAP_DATA_B]] -> %[[ARG1:.*]], %[[MAP_DATA_SZ]] -> %[[ARG2:.*]] : !fir.ref<!fir.array<?xf32>>, !fir.ref<index>) { !HLFIRDIALECT: %[[SZ_LD:.*]] = fir.load %[[ARG2]] : !fir.ref<index> !HLFIRDIALECT: %[[SZ_CONV:.*]] = fir.convert %[[SZ_LD]] : (index) -> i64 diff --git a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 index a774407..791d509 100644 --- a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 +++ b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 @@ -96,7 +96,7 @@ end module mod ! CHECK-NO-FPRIV: } ! CHECK-NO-FPRIV: %[[VAL_13:.*]] = arith.subi %[[VAL_14:.*]]#0, %[[VAL_10]] : index ! CHECK-NO-FPRIV: %[[VAL_15:.*]] = omp.map.bounds lower_bound(%[[VAL_9]] : index) upper_bound(%[[VAL_13]] : index) extent(%[[VAL_14]]#0 : index) stride(%[[VAL_14]]#1 : index) start_idx(%[[VAL_9]] : index) {stride_in_bytes = true} -! CHECK-NO-FPRIV: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%[[VAL_15]]) -> !fir.ref<!fir.char<1,?>> {name = "a"} +! CHECK-NO-FPRIV: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(implicit) capture(ByCopy) bounds(%[[VAL_15]]) -> !fir.ref<!fir.char<1,?>> {name = "a"} ! CHECK-NO-FPRIV: fir.store %[[ARG0]] to %[[VAL_0]] : !fir.ref<!fir.boxchar<1>> ! CHECK-NO-FPRIV: %[[VAL_17:.*]] = arith.constant 0 : index ! CHECK-NO-FPRIV: %[[VAL_18:.*]] = arith.constant 1 : index diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 1aef64a..26bd62e 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -69,7 +69,7 @@ subroutine omp_target_enter_mt !CHECK: %[[BOUNDS_1:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr(%{{.*}}) map_clauses(to) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"} !CHECK: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) - !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} + !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, storage) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} !CHECK: %[[BOUNDS_3:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(to) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"} !CHECK: omp.target_enter_data map_entries(%[[MAP_0]], %[[MAP_1]], %[[MAP_2]], %[[MAP_3]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>) @@ -150,9 +150,9 @@ subroutine omp_target_exit_mt !CHECK: %[[BOUNDS_1:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(from) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"} !CHECK: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) - !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} + !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(storage) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"} !CHECK: %[[BOUNDS_3:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) - !CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, delete) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"} + !CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, delete, storage) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"} !CHECK: %[[BOUNDS_4:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP_4:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(from) capture(ByRef) bounds(%[[BOUNDS_4]]) -> !fir.ref<!fir.array<1024xi32>> {name = "e"} !CHECK: omp.target_exit_data map_entries(%[[MAP_0]], %[[MAP_1]], %[[MAP_2]], %[[MAP_3]], %[[MAP_4]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>) @@ -482,7 +482,7 @@ subroutine omp_target_implicit_bounds(n) integer :: a(n) !CHECK: %[[VAL_14:.*]] = omp.map.bounds lower_bound(%c0{{.*}} : index) upper_bound(%[[UB]] : index) extent(%[[VAL_7]] : index) stride(%c1{{.*}} : index) start_idx(%c1{{.*}} : index) !CHECK: %[[VAL_15:.*]] = omp.map.info var_ptr(%[[VAL_10]]#1 : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[VAL_14]]) -> !fir.ref<!fir.array<?xi32>> {name = "a"} - !CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_COPY]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = ""} + !CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_COPY]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = ""} !CHECK: omp.target map_entries(%[[VAL_15]] -> %[[VAL_17:.*]], %[[VAL_16]] -> %[[VAL_18:.*]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>) { !$omp target !CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32> @@ -642,8 +642,8 @@ subroutine target_unstructured integer :: i = 1 !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtarget_unstructuredEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) integer :: j = 11 - !CHECK-NO-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"} - !CHECK-NO-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "j"} + !CHECK-NO-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "i"} + !CHECK-NO-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "j"} !CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref<i32>, !fir.ref<i32>) { !CHECK-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#0 : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32> !CHECK-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#0 : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32> diff --git a/flang/test/Preprocessing/bug164470.cuf b/flang/test/Preprocessing/bug164470.cuf new file mode 100644 index 0000000..3e959f4 --- /dev/null +++ b/flang/test/Preprocessing/bug164470.cuf @@ -0,0 +1,6 @@ +!RUN: %flang_fc1 -x cuda -fdebug-unparse %s 2>&1 | FileCheck %s +!CHECK: ATTRIBUTES(DEVICE) FUNCTION foo() +!@cuf attributes(device) & +function foo() + foo = 1. +end diff --git a/flang/test/Semantics/OpenACC/bug1583.f90 b/flang/test/Semantics/OpenACC/bug1583.f90 new file mode 100644 index 0000000..7778d46 --- /dev/null +++ b/flang/test/Semantics/OpenACC/bug1583.f90 @@ -0,0 +1,23 @@ +! RUN: %python %S/../test_symbols.py %s %flang_fc1 -fopenacc +!DEF: /m Module +module m + !DEF: /m/t PUBLIC DerivedType + type :: t + !DEF: /m/t/c ALLOCATABLE ObjectEntity REAL(4) + real, allocatable :: c(:) + end type +contains + !DEF: /m/sub PUBLIC (Subroutine) Subprogram + !DEF: /m/sub/v ObjectEntity TYPE(t) + subroutine sub (v) + !REF: /m/t + !REF: /m/sub/v + type(t) :: v +!$acc host_data use_device(v%c) + !DEF: /foo EXTERNAL (Subroutine) ProcEntity + !REF: /m/sub/v + !REF: /m/t/c + call foo(v%c) +!$acc end host_data + end subroutine +end module diff --git a/flang/test/Semantics/bug164303.f90 b/flang/test/Semantics/bug164303.f90 new file mode 100644 index 0000000..c356c07 --- /dev/null +++ b/flang/test/Semantics/bug164303.f90 @@ -0,0 +1,31 @@ +!RUN: %flang -fc1 -fsyntax-only %s 2>&1 | FileCheck --allow-empty %s +module foo_mod + use, intrinsic :: iso_fortran_env + use, intrinsic :: iso_c_binding + implicit none + + interface new_foo + procedure :: foo_ctor + end interface + +contains + +function foo_ctor(options) result(retval) + implicit none + integer, intent(in) :: options + integer :: retval + + interface +!CHECK-NOT: error: + subroutine new_foo(f, opt) bind(c, name='new_foo') + import + implicit none + integer, intent(inout) :: f + integer(c_int), intent(in) :: opt + end subroutine + end interface + + call new_foo(retval, options) +end function + +end module diff --git a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 index 3dca134..40f66c1 100644 --- a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 +++ b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 @@ -30,12 +30,12 @@ end program do_concurrent_shape ! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info ! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index) -! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim0"} ! CHECK: %[[DIM1_EXT_MAP:.*]] = omp.map.info ! CHECK-SAME: var_ptr(%[[DIM1_EXT]] : !fir.ref<index>, index) -! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim1"} ! CHECK: omp.target host_eval({{.*}}) map_entries( @@ -79,12 +79,12 @@ end subroutine do_concurrent_shape_shift ! CHECK: %[[DIM0_STRT_MAP:.*]] = omp.map.info ! CHECK-SAME: var_ptr(%[[DIM0_STRT]] : !fir.ref<index>, index) -! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.start_idx.dim0"} ! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info ! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index) -! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.extent.dim0"} ! CHECK: omp.target host_eval({{.*}}) map_entries( diff --git a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 index b6b2136..af48eb4 100644 --- a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 +++ b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 @@ -24,7 +24,7 @@ end subroutine test_non_refernece ! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index) ! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index) -! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc) +! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = ""} diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir index 04e60ca..aef72e4 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir @@ -12,10 +12,10 @@ // CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "ub"} // CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "step"} // CHECK: %[[VAL_6:.*]] = omp.map.info var_ptr(%[[ARG3:.*]] : !fir.ref<index>, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref<index> {name = "addr"} -// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "lb"} -// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "ub"} -// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "step"} -// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "addr"} +// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "lb"} +// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "ub"} +// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "step"} +// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "addr"} // CHECK: omp.target_data map_entries(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]], %[[VAL_6]] : !fir.ref<index>, !fir.ref<index>, !fir.ref<index>, !fir.ref<index>) { // CHECK: %[[VAL_11:.*]] = fir.alloca index // CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<index>, index) map_clauses(from) capture(ByRef) -> !fir.ref<index> {name = "__flang_workdistribute_from"} diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir index 062eb70..25f0350 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir @@ -12,10 +12,10 @@ // CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "ub"} // CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "step"} // CHECK: %[[VAL_6:.*]] = omp.map.info var_ptr(%[[ARG3:.*]] : !fir.ref<index>, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref<index> {name = "addr"} -// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "lb"} -// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "ub"} -// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "step"} -// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "addr"} +// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "lb"} +// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "ub"} +// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "step"} +// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "addr"} // CHECK: omp.target_data map_entries(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]], %[[VAL_6]] : !fir.ref<index>, !fir.ref<index>, !fir.ref<index>, !fir.ref<index>) { // CHECK: %[[VAL_11:.*]] = fir.alloca index // CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<index>, index) map_clauses(from) capture(ByRef) -> !fir.ref<index> {name = "__flang_workdistribute_from"} diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir index 7bc0ae4..b30a2fc 100644 --- a/flang/test/Transforms/omp-map-info-finalization.fir +++ b/flang/test/Transforms/omp-map-info-finalization.fir @@ -257,7 +257,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi %1 = omp.map.bounds lower_bound(%c1_15 : index) upper_bound(%c1_15 : index) extent(%c1_15 : index) stride(%c1_15 : index) start_idx(%c1_15 : index) {stride_in_bytes = true} %2 = fir.coordinate_of %0#0, vertexes : (!fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> %3 = omp.map.bounds lower_bound(%c1_15 : index) upper_bound(%c1_15 : index) extent(%c1_15 : index) stride(%c1_15 : index) start_idx(%c1_15 : index) {stride_in_bytes = true} - %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%3) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"} + %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) map_clauses(storage) capture(ByRef) bounds(%3) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"} %5 = fir.load %2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> %c2_i64 = arith.constant 2 : i64 %c1_20 = arith.constant 1 : index @@ -266,7 +266,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi %8 = fir.coordinate_of %5, %7 : (!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, index) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> %9 = fir.coordinate_of %8, vertexy : (!fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> %10 = omp.map.info var_ptr(%9 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%1) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"} - %11 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>, !fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%4, %10 : [1], [1,2] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>> {name = "alloca_dtype", partial_map = true} + %11 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>, !fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>) map_clauses(storage) capture(ByRef) members(%4, %10 : [1], [1,2] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>> {name = "alloca_dtype", partial_map = true} omp.target map_entries(%11 -> %arg1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>) { omp.terminator } @@ -277,7 +277,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi // CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ARG0]] {{.*}} : (!fir.ref<!fir.type<[[REC_TY]]>>) -> (!fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.type<[[REC_TY]]>>) // CHECK: %[[DESC_1:.*]] = fir.coordinate_of %[[DECLARE]]#0, vertexes : (!fir.ref<!fir.type<[[REC_TY]]>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2:_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<\?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<\?xi32>>>}]]>>>>> // CHECK: %[[BASE_ADDR_1:.*]] = fir.box_offset %[[DESC_1]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>> -// CHECK: %[[BASE_ADDR_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.type<[[REC_TY2]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_1]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>> {{.*}} +// CHECK: %[[BASE_ADDR_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.type<[[REC_TY2]]>) map_clauses(storage) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_1]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>> {{.*}} // CHECK: %[[DESC_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>> {{.*}} // CHECK: %[[DESC_LD_1:.*]] = fir.load %[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>> // CHECK: %[[MEMBER_ACCESS_1:.*]] = fir.coordinate_of %[[DESC_LD_1]], %{{.*}} : (!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, index) -> !fir.ref<!fir.type<[[REC_TY2]]>> @@ -285,7 +285,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi // CHECK: %[[BASE_ADDR_2:.*]] = fir.box_offset %[[DESC_2]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> // CHECK: %[[BASE_ADDR_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_2]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {{.*}} // CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}} -// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true} +// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(storage) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true} // CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { // ----- diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8bf6c44..714120a 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index dffccba..f6bbb34 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index b4ab073..7a8d74a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs @@ -1373,6 +1374,11 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.wcstombs libc.src.wchar.wcsrtombs libc.src.wchar.wcsnrtombs + + # nl_types.h entrypoints + libc.src.nl_types.catopen + libc.src.nl_types.catclose + libc.src.nl_types.catgets ) endif() diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt index 0573851..d0f62eb 100644 --- a/libc/config/linux/x86_64/headers.txt +++ b/libc/config/linux/x86_64/headers.txt @@ -19,6 +19,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.malloc libc.include.math libc.include.netinet_in + libc.include.nl_types libc.include.poll libc.include.pthread libc.include.sched diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst index aeeaf17..4f8ac22 100644 --- a/libc/docs/dev/undefined_behavior.rst +++ b/libc/docs/dev/undefined_behavior.rst @@ -156,3 +156,10 @@ parsed as normal. For l64a it's unspecified what happens if the input value is negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints. Additionally, the return of l64a is in a thread-local buffer that's overwritten on each call. + +`inet_aton` and Non-Standard Binary Integers +-------------------------------------------- +The current implementation of the `inet_aton` function utilizes the same code +as `strtol` to parse IPv4 numbers-and-dots notations. This approach may permit +the use of binary integers (prefixed with 0b), which is not supported by the +standard. diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index a5c1878..09f169b 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -771,6 +771,14 @@ add_header_macro( .llvm-libc-macros.poll-macros ) +add_header_macro( + nl_types + ../libc/include/nl_types.yaml + nl_types.h + DEPENDS + .llvm-libc-types.nl_catd + ) + # UEFI spec references "Uefi.h" so we use that name for compatibility add_header_macro( uefi diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml index 10cd56d..6e06290 100644 --- a/libc/include/arpa/inet.yaml +++ b/libc/include/arpa/inet.yaml @@ -1,7 +1,8 @@ header: arpa/inet.h header_template: inet.h.def macros: [] -types: [] +types: + - type_name: in_addr enums: [] objects: [] functions: @@ -17,6 +18,13 @@ functions: return_type: uint16_t arguments: - type: uint16_t + - name: inet_aton + standards: + - llvm_libc_ext + return_type: int + arguments: + - type: const char * + - type: in_addr * - name: ntohl standards: - POSIX diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 5f506c4..a428a0e 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -46,6 +46,7 @@ add_header(mbstate_t HDR mbstate_t.h) add_header(mode_t HDR mode_t.h) add_header(mtx_t HDR mtx_t.h DEPENDS .__futex_word .__mutex_type) add_header(nfds_t HDR nfds_t.h) +add_header(nl_catd HDR nl_catd.h) add_header(nlink_t HDR nlink_t.h) add_header(off_t HDR off_t.h) add_header(once_flag HDR once_flag.h DEPENDS .__futex_word) diff --git a/libc/include/llvm-libc-types/nl_catd.h b/libc/include/llvm-libc-types/nl_catd.h new file mode 100644 index 0000000..ccdb020 --- /dev/null +++ b/libc/include/llvm-libc-types/nl_catd.h @@ -0,0 +1,14 @@ +//===-- Definition of nl_catd type ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_NL_CATD_H +#define LLVM_LIBC_TYPES_NL_CATD_H + +typedef void *nl_catd; + +#endif // LLVM_LIBC_TYPES_NL_CATD_H diff --git a/libc/include/nl_types.yaml b/libc/include/nl_types.yaml new file mode 100644 index 0000000..aecbb44 --- /dev/null +++ b/libc/include/nl_types.yaml @@ -0,0 +1,31 @@ +header: nl_types.h +standards: + - posix +macros: [] +types: + - type_name: nl_catd +enums: [] +objects: [] +functions: + - name: catopen + standards: + - posix + return_type: nl_catd + arguments: + - type: const char * + - type: int + - name: catclose + standards: + - posix + return_type: int + arguments: + - type: nl_catd + - name: catgets + standards: + - posix + return_type: char * + arguments: + - type: nl_catd + - type: int + - type: int + - type: const char* diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index d7a1e1f..b2afe0a 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -37,6 +37,7 @@ add_subdirectory(arpa) add_subdirectory(assert) add_subdirectory(compiler) add_subdirectory(locale) +add_subdirectory(nl_types) add_subdirectory(search) add_subdirectory(setjmp) add_subdirectory(signal) diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h index 9affced..3913f7a 100644 --- a/libc/src/__support/FPUtil/double_double.h +++ b/libc/src/__support/FPUtil/double_double.h @@ -144,8 +144,9 @@ LIBC_INLINE NumberPair<T> exact_mult(T a, T b) { return r; } -LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) { - DoubleDouble r = exact_mult(a, b.hi); +template <typename T = double> +LIBC_INLINE NumberPair<T> quick_mult(T a, const NumberPair<T> &b) { + NumberPair<T> r = exact_mult(a, b.hi); r.lo = multiply_add(a, b.lo, r.lo); return r; } diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 47bb328..6209000 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -926,6 +926,7 @@ add_header_library( sincosf_utils HDRS sincosf_utils.h + sincosf_float_eval.h DEPENDS .range_reduction libc.src.__support.FPUtil.fp_bits diff --git a/libc/src/__support/math/cosf.h b/libc/src/__support/math/cosf.h index 074be0b..48ba71a 100644 --- a/libc/src/__support/math/cosf.h +++ b/libc/src/__support/math/cosf.h @@ -9,7 +9,6 @@ #ifndef LIBC_SRC___SUPPORT_MATH_COSF_H #define LIBC_SRC___SUPPORT_MATH_COSF_H -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/except_value_utils.h" @@ -18,6 +17,26 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ + defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \ + defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + +#include "sincosf_float_eval.h" + +namespace LIBC_NAMESPACE_DECL { +namespace math { + +LIBC_INLINE static constexpr float cosf(float x) { + return sincosf_float_eval::sincosf_eval</*IS_SIN*/ false>(x); +} + +} // namespace math +} // namespace LIBC_NAMESPACE_DECL + +#else // !LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT + +#include "sincosf_utils.h" + namespace LIBC_NAMESPACE_DECL { namespace math { @@ -51,7 +70,6 @@ LIBC_INLINE static constexpr float cosf(float x) { xbits.set_sign(Sign::POS); uint32_t x_abs = xbits.uintval(); - double xd = static_cast<double>(xbits.get_val()); // Range reduction: // For |x| > pi/16, we perform range reduction as follows: @@ -90,6 +108,7 @@ LIBC_INLINE static constexpr float cosf(float x) { // computed using degree-7 and degree-6 minimax polynomials generated by // Sollya respectively. +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS // |x| < 0x1.0p-12f if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) { // When |x| < 2^-12, the relative error of the approximation cos(x) ~ 1 @@ -108,12 +127,12 @@ LIBC_INLINE static constexpr float cosf(float x) { // emulated version of FMA. #if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f); -#else +#else // !LIBC_TARGET_CPU_HAS_FMA_FLOAT + double xd = static_cast<double>(xbits.get_val()); return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0)); #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT } -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS if (auto r = COSF_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value())) return r.value(); #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS @@ -132,6 +151,7 @@ LIBC_INLINE static constexpr float cosf(float x) { return x + FPBits::quiet_nan().get_val(); } + double xd = static_cast<double>(xbits.get_val()); // Combine the results with the sine of sum formula: // cos(x) = cos((k + y)*pi/32) // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32) @@ -150,3 +170,5 @@ LIBC_INLINE static constexpr float cosf(float x) { } // namespace LIBC_NAMESPACE_DECL #endif // LIBC_SRC___SUPPORT_MATH_COSF_H + +#endif // LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT diff --git a/libc/src/__support/math/sincosf_float_eval.h b/libc/src/__support/math/sincosf_float_eval.h new file mode 100644 index 0000000..836e928 --- /dev/null +++ b/libc/src/__support/math/sincosf_float_eval.h @@ -0,0 +1,223 @@ +//===-- Compute sin + cos for small angles ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +namespace sincosf_float_eval { + +// Since the worst case of `x mod pi` in single precision is > 2^-28, in order +// to be bounded by 1 ULP, the range reduction accuracy will need to be at +// least 2^(-28 - 23) = 2^-51. +// For fast small range reduction, we will compute as follow: +// Let pi ~ c0 + c1 + c2 +// with |c1| < ulp(c0)/2 and |c2| < ulp(c1)/2 +// then: +// k := nearest_int(x * 1/pi); +// u = (x - k * c0) - k * c1 - k * c2 +// We requires k * c0, k * c1 to be exactly representable in single precision. +// Let p_k be the precision of k, then the precision of c0 and c1 are: +// 24 - p_k, +// and the ulp of (k * c2) is 2^(-3 * (24 - p_k)). +// This give us the following bound on the precision of k: +// 3 * (24 - p_k) >= 51, +// or equivalently: +// p_k <= 7. +// We set the bound for p_k to be 6 so that we can have some more wiggle room +// for computations. +LIBC_INLINE static unsigned sincosf_range_reduction_small(float x, float &u) { + // > display=hexadecimal; + // > a = round(pi/8, 18, RN); + // > b = round(pi/8 - a, 18, RN); + // > c = round(pi/8 - a - b, SG, RN); + // > round(8/pi, SG, RN); + constexpr float MPI[3] = {-0x1.921f8p-2f, -0x1.aa22p-21f, -0x1.68c234p-41f}; + constexpr float ONE_OVER_PI = 0x1.45f306p+1f; + float prod_hi = x * ONE_OVER_PI; + float k = fputil::nearest_integer(prod_hi); + + float y_hi = fputil::multiply_add(k, MPI[0], x); // Exact + u = fputil::multiply_add(k, MPI[1], y_hi); + u = fputil::multiply_add(k, MPI[2], u); + return static_cast<unsigned>(static_cast<int>(k)); +} + +// TODO: Add non-FMA version of large range reduction. +LIBC_INLINE static unsigned sincosf_range_reduction_large(float x, float &u) { + // > for i from 0 to 13 do { + // if i < 2 then { pi_inv = 0.25 + 2^(8*(i - 2)) / pi; } + // else { pi_inv = 2^(8*(i-2)) / pi; }; + // pn = nearestint(pi_inv); + // pi_frac = pi_inv - pn; + // a = round(pi_frac, SG, RN); + // b = round(pi_frac - a, SG, RN); + // c = round(pi_frac - a - b, SG, RN); + // d = round(pi_frac - a - b - c, SG, RN); + // print("{", 2^3 * a, ",", 2^3 * b, ",", 2^3 * c, ",", 2^3 * d, "},"); + // }; + constexpr float EIGHT_OVER_PI[14][4] = { + {0x1.000146p1f, -0x1.9f246cp-28f, -0x1.bbead6p-54f, -0x1.ec5418p-85f}, + {0x1.0145f4p1f, -0x1.f246c6p-24f, -0x1.df56bp-49f, -0x1.ec5418p-77f}, + {0x1.45f306p1f, 0x1.b9391p-24f, 0x1.529fc2p-50f, 0x1.d5f47ep-76f}, + {0x1.f306dcp1f, 0x1.391054p-24f, 0x1.4fe13ap-49f, 0x1.7d1f54p-74f}, + {-0x1.f246c6p0f, -0x1.df56bp-25f, -0x1.ec5418p-53f, 0x1.f534dep-78f}, + {-0x1.236378p1f, 0x1.529fc2p-26f, 0x1.d5f47ep-52f, -0x1.65912p-77f}, + {0x1.391054p0f, 0x1.4fe13ap-25f, 0x1.7d1f54p-50f, -0x1.6447e4p-75f}, + {0x1.1054a8p0f, -0x1.ec5418p-29f, 0x1.f534dep-54f, -0x1.f924ecp-81f}, + {0x1.529fc2p-2f, 0x1.d5f47ep-28f, -0x1.65912p-53f, 0x1.b6c52cp-79f}, + {-0x1.ac07b2p1f, 0x1.5f47d4p-24f, 0x1.a6ee06p-49f, 0x1.b6295ap-74f}, + {-0x1.ec5418p-5f, 0x1.f534dep-30f, -0x1.f924ecp-57f, 0x1.5993c4p-82f}, + {0x1.3abe9p-1f, -0x1.596448p-27f, 0x1.b6c52cp-55f, -0x1.9b0ef2p-80f}, + {-0x1.505c16p1f, 0x1.a6ee06p-25f, 0x1.b6295ap-50f, -0x1.b0ef1cp-76f}, + {-0x1.70565ap-1f, 0x1.dc0db6p-26f, 0x1.4acc9ep-53f, 0x1.0e4108p-80f}, + }; + + using FPBits = typename fputil::FPBits<float>; + using fputil::FloatFloat; + FPBits xbits(x); + + int x_e_m32 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 32); + unsigned idx = static_cast<unsigned>((x_e_m32 >> 3) + 2); + // Scale x down by 2^(-(8 * (idx - 2)) + xbits.set_biased_exponent((x_e_m32 & 7) + FPBits::EXP_BIAS + 32); + // 2^32 <= |x_reduced| < 2^(32 + 8) = 2^40 + float x_reduced = xbits.get_val(); + // x * c_hi = ph.hi + ph.lo exactly. + FloatFloat ph = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][0]); + // x * c_mid = pm.hi + pm.lo exactly. + FloatFloat pm = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][1]); + // x * c_lo = pl.hi + pl.lo exactly. + FloatFloat pl = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][2]); + // Extract integral parts and fractional parts of (ph.lo + pm.hi). + float sum_hi = ph.lo + pm.hi; + float k = fputil::nearest_integer(sum_hi); + + // x * 8/pi mod 1 ~ y_hi + y_mid + y_lo + float y_hi = (ph.lo - k) + pm.hi; // Exact + FloatFloat y_mid = fputil::exact_add(pm.lo, pl.hi); + float y_lo = pl.lo; + + // y_l = x * c_lo_2 + pl.lo + float y_l = fputil::multiply_add(x_reduced, EIGHT_OVER_PI[idx][3], y_lo); + FloatFloat y = fputil::exact_add(y_hi, y_mid.hi); + y.lo += (y_mid.lo + y_l); + + // Digits of pi/8, generated by Sollya with: + // > a = round(pi/8, SG, RN); + // > b = round(pi/8 - SG, D, RN); + constexpr FloatFloat PI_OVER_8 = {-0x1.777a5cp-27f, 0x1.921fb6p-2f}; + + // Error bound: with {a} denote the fractional part of a, i.e.: + // {a} = a - round(a) + // Then, + // | {x * 8/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-47 + // | {x mod pi/8} - (u.hi + u.lo) | < 2 * 2^-5 * 2^-47 = 2^-51 + u = fputil::multiply_add(y.hi, PI_OVER_8.hi, y.lo * PI_OVER_8.hi); + + return static_cast<unsigned>(static_cast<int>(k)); +} + +template <bool IS_SIN> LIBC_INLINE static float sincosf_eval(float x) { + // sin(k * pi/8) for k = 0..15, generated by Sollya with: + // > for k from 0 to 16 do { + // print(round(sin(k * pi/8), SG, RN)); + // }; + constexpr float SIN_K_PI_OVER_8[16] = { + 0.0f, 0x1.87de2ap-2f, 0x1.6a09e6p-1f, 0x1.d906bcp-1f, + 1.0f, 0x1.d906bcp-1f, 0x1.6a09e6p-1f, 0x1.87de2ap-2f, + 0.0f, -0x1.87de2ap-2f, -0x1.6a09e6p-1f, -0x1.d906bcp-1f, + -1.0f, -0x1.d906bcp-1f, -0x1.6a09e6p-1f, -0x1.87de2ap-2f, + }; + + using FPBits = fputil::FPBits<float>; + FPBits xbits(x); + uint32_t x_abs = cpp::bit_cast<uint32_t>(x) & 0x7fff'ffffU; + + float y; + unsigned k = 0; + if (x_abs < 0x4880'0000U) { + k = sincosf_range_reduction_small(x, y); + } else { + + if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + if (x_abs == 0x7f80'0000U) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + return x + FPBits::quiet_nan().get_val(); + } + + k = sincosf_range_reduction_large(x, y); + } + + float sin_k = SIN_K_PI_OVER_8[k & 15]; + // cos(k * pi/8) = sin(k * pi/8 + pi/2) = sin((k + 4) * pi/8). + // cos_k = cos(k * pi/8) + float cos_k = SIN_K_PI_OVER_8[(k + 4) & 15]; + + float y_sq = y * y; + + // Polynomial approximation of sin(y) and cos(y) for |y| <= pi/16: + // + // Using Taylor polynomial for sin(y): + // sin(y) ~ y - y^3 / 6 + y^5 / 120 + // Using minimax polynomial generated by Sollya for cos(y) with: + // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]); + // + // Error bounds: + // * For sin(y) + // > P = x - SG(1/6)*x^3 + SG(1/120) * x^5; + // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]); + // 0x1.825...p-27 + // * For cos(y) + // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]); + // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]); + // 0x1.aa8...p-29 + + // p1 = y^2 * 1/120 - 1/6 + float p1 = fputil::multiply_add(y_sq, 0x1.111112p-7f, -0x1.555556p-3f); + // q1 = y^2 * coeff(Q, 4) + coeff(Q, 2) + float q1 = fputil::multiply_add(y_sq, 0x1.54b8bep-5f, -0x1.ffffc4p-2f); + float y3 = y_sq * y; + // c1 ~ cos(y) + float c1 = fputil::multiply_add(y_sq, q1, 1.0f); + // s1 ~ sin(y) + float s1 = fputil::multiply_add(y3, p1, y); + + if constexpr (IS_SIN) { + // sin(x) = cos(k * pi/8) * sin(y) + sin(k * pi/8) * cos(y). + return fputil::multiply_add(cos_k, s1, sin_k * c1); + } else { + // cos(x) = cos(k * pi/8) * cos(y) - sin(k * pi/8) * sin(y). + return fputil::multiply_add(cos_k, c1, -sin_k * s1); + } +} + +} // namespace sincosf_float_eval + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt index 1f39a07..bb43e24 100644 --- a/libc/src/arpa/inet/CMakeLists.txt +++ b/libc/src/arpa/inet/CMakeLists.txt @@ -23,6 +23,19 @@ add_entrypoint_object( ) add_entrypoint_object( + inet_aton + SRCS + inet_aton.cpp + HDRS + inet_aton.h + DEPENDS + libc.include.arpa_inet + libc.include.llvm-libc-types.in_addr + libc.src.__support.common + libc.src.__support.str_to_integer +) + +add_entrypoint_object( ntohl SRCS ntohl.cpp diff --git a/libc/src/arpa/inet/inet_aton.cpp b/libc/src/arpa/inet/inet_aton.cpp new file mode 100644 index 0000000..71419cb --- /dev/null +++ b/libc/src/arpa/inet/inet_aton.cpp @@ -0,0 +1,57 @@ +//===-- Implementation of inet_aton function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/inet_aton.h" +#include "src/__support/common.h" +#include "src/__support/endian_internal.h" +#include "src/__support/str_to_integer.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) { + constexpr int IPV4_MAX_DOT_NUM = 3; + unsigned long parts[IPV4_MAX_DOT_NUM + 1] = {0}; + int dot_num = 0; + + for (; dot_num <= IPV4_MAX_DOT_NUM; ++dot_num) { + auto result = internal::strtointeger<unsigned long>(cp, 0); + parts[dot_num] = result; + + if (result.has_error() || result.parsed_len == 0) + return 0; + char next_char = *(cp + result.parsed_len); + if (next_char != '.' && next_char != '\0') + return 0; + else if (next_char == '\0') + break; + else + cp += (result.parsed_len + 1); + } + + if (dot_num > IPV4_MAX_DOT_NUM) + return 0; + + // converts the Internet host address cp from the IPv4 numbers-and-dots + // notation (a[.b[.c[.d]]]) into binary form (in network byte order) + unsigned long result = 0; + for (int i = 0; i <= dot_num; ++i) { + unsigned long max_part = + i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL; + if (parts[i] > max_part) + return 0; + int shift = i == dot_num ? 0 : 8 * (IPV4_MAX_DOT_NUM - i); + result |= parts[i] << shift; + } + + if (inp) + inp->s_addr = Endian::to_big_endian(static_cast<uint32_t>(result)); + + return 1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/arpa/inet/inet_aton.h b/libc/src/arpa/inet/inet_aton.h new file mode 100644 index 0000000..ea387d1 --- /dev/null +++ b/libc/src/arpa/inet/inet_aton.h @@ -0,0 +1,21 @@ +//===-- Implementation header of inet_aton ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H +#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H + +#include "include/llvm-libc-types/in_addr.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int inet_aton(const char *cp, in_addr *inp); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp index a8e634c..c362628 100644 --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -17,13 +17,30 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ + defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \ + defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + +#include "src/__support/math/sincosf_float_eval.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float, sinf, (float x)) { + return math::sincosf_float_eval::sincosf_eval</*IS_SIN*/ true>(x); +} + +} // namespace LIBC_NAMESPACE_DECL + +#else // !LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT + #include "src/__support/math/sincosf_utils.h" -#if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE #include "src/__support/math/range_reduction_fma.h" -#else +#else // !LIBC_TARGET_CPU_HAS_FMA_DOUBLE #include "src/__support/math/range_reduction.h" -#endif +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -162,3 +179,4 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) { } } // namespace LIBC_NAMESPACE_DECL +#endif // LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT diff --git a/libc/src/nl_types/CMakeLists.txt b/libc/src/nl_types/CMakeLists.txt new file mode 100644 index 0000000..9783e0e --- /dev/null +++ b/libc/src/nl_types/CMakeLists.txt @@ -0,0 +1,31 @@ +add_entrypoint_object( + catopen + SRCS + catopen.cpp + HDRS + catopen.h + DEPENDS + libc.include.llvm-libc-types.nl_catd + libc.src.errno.errno +) + +add_entrypoint_object( + catclose + SRCS + catclose.cpp + HDRS + catclose.h + DEPENDS + libc.include.llvm-libc-types.nl_catd +) + +add_entrypoint_object( + catgets + SRCS + catgets.cpp + HDRS + catgets.h + DEPENDS + libc.include.llvm-libc-types.nl_catd +) + diff --git a/libc/src/nl_types/catclose.cpp b/libc/src/nl_types/catclose.cpp new file mode 100644 index 0000000..1f87900d --- /dev/null +++ b/libc/src/nl_types/catclose.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of catclose ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/nl_types/catclose.h" +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, catclose, ([[maybe_unused]] nl_catd catalog)) { + // TODO: Add implementation for message catalogs. For now, return error + // regardless of input. + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/nl_types/catclose.h b/libc/src/nl_types/catclose.h new file mode 100644 index 0000000..433020a --- /dev/null +++ b/libc/src/nl_types/catclose.h @@ -0,0 +1,21 @@ +//===-- Implementation header for catclose ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H +#define LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H + +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int catclose(nl_catd catalog); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H diff --git a/libc/src/nl_types/catgets.cpp b/libc/src/nl_types/catgets.cpp new file mode 100644 index 0000000..3768977 --- /dev/null +++ b/libc/src/nl_types/catgets.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of catgets -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/nl_types/catgets.h" +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(char *, catgets, + ([[maybe_unused]] nl_catd catalog, + [[maybe_unused]] int set_number, + [[maybe_unused]] int message_number, const char *message)) { + // TODO: Add implementation for message catalogs. For now, return backup + // message regardless of input. + return const_cast<char *>(message); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/nl_types/catgets.h b/libc/src/nl_types/catgets.h new file mode 100644 index 0000000..c909bec --- /dev/null +++ b/libc/src/nl_types/catgets.h @@ -0,0 +1,22 @@ +//===-- Implementation header for catgets -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_NL_TYPES_CATGETS_H +#define LLVM_LIBC_SRC_NL_TYPES_CATGETS_H + +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +char *catgets(nl_catd catalog, int set_number, int message_number, + const char *message); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_NL_TYPES_CATGETS_H diff --git a/libc/src/nl_types/catopen.cpp b/libc/src/nl_types/catopen.cpp new file mode 100644 index 0000000..393d760 --- /dev/null +++ b/libc/src/nl_types/catopen.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of catopen -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/nl_types/catopen.h" +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(nl_catd, catopen, + ([[maybe_unused]] const char *name, + [[maybe_unused]] int flag)) { + // TODO: Add implementation for message catalogs. For now, return error + // regardless of input. + libc_errno = EINVAL; + return reinterpret_cast<nl_catd>(-1); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/nl_types/catopen.h b/libc/src/nl_types/catopen.h new file mode 100644 index 0000000..08ff71a --- /dev/null +++ b/libc/src/nl_types/catopen.h @@ -0,0 +1,21 @@ +//===-- Implementation header for catopen -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H +#define LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H + +#include "include/llvm-libc-types/nl_catd.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +nl_catd catopen(const char *name, int flag); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index c576e08..0c6ec9f 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -96,6 +96,7 @@ add_subdirectory(assert) add_subdirectory(compiler) add_subdirectory(dirent) add_subdirectory(locale) +add_subdirectory(nl_types) add_subdirectory(signal) add_subdirectory(spawn) diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt index 21760df..690f751 100644 --- a/libc/test/src/arpa/inet/CMakeLists.txt +++ b/libc/test/src/arpa/inet/CMakeLists.txt @@ -23,6 +23,17 @@ add_libc_unittest( ) add_libc_unittest( + inet_aton + SUITE + libc_arpa_inet_unittests + SRCS + inet_aton_test.cpp + DEPENDS + libc.src.arpa.inet.htonl + libc.src.arpa.inet.inet_aton +) + +add_libc_unittest( ntohl SUITE libc_arpa_inet_unittests diff --git a/libc/test/src/arpa/inet/inet_aton_test.cpp b/libc/test/src/arpa/inet/inet_aton_test.cpp new file mode 100644 index 0000000..c9c9787 --- /dev/null +++ b/libc/test/src/arpa/inet/inet_aton_test.cpp @@ -0,0 +1,92 @@ +//===-- Unittests for inet_aton -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/htonl.h" +#include "src/arpa/inet/inet_aton.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE_DECL { + +TEST(LlvmLibcInetAton, ValidTest) { + in_addr a; + + // a.b.c.d + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1.2.4", &a)); + ASSERT_EQ(htonl(0x7f010204), a.s_addr); + + // a.b.c + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1.4", &a)); + ASSERT_EQ(htonl(0x7f010004), a.s_addr); + + // a.b + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + // a + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0x7f000001", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + // Hex (0x) and mixed-case hex digits. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a)); + ASSERT_EQ(htonl(0xff000001), a.s_addr); + + // Hex (0X) and mixed-case hex digits. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a)); + ASSERT_EQ(htonl(0xff000001), a.s_addr); + + // Octal. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0177.0.0.1", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("036", &a)); + ASSERT_EQ(htonl(036U), a.s_addr); +} + +TEST(LlvmLibcInetAton, InvalidTest) { + ASSERT_EQ(0, inet_aton("", nullptr)); // Empty. + ASSERT_EQ(0, inet_aton("x", nullptr)); // Leading junk. + ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk. + ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr)); // Invalid octal. + ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr)); // Invalid hex. + ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots. + ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr)); // Trailing dot. + + // Out of range a.b.c.d form. + ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr)); + + // Out of range a.b.c form. + ASSERT_EQ(0, inet_aton("256.0.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.256.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr)); + + // Out of range a.b form. + ASSERT_EQ(0, inet_aton("256.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr)); + + // Out of range a form. + ASSERT_EQ(0, inet_aton("0x100000000", nullptr)); + + // 64-bit overflow. + ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr)); + + // Out of range octal. + ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index b3f54ab..ff5c511 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -17,6 +17,20 @@ add_fp_unittest( ) add_fp_unittest( + cosf_float_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + cosf_float_test.cpp + DEPENDS + libc.src.__support.math.sincosf_utils + libc.src.__support.FPUtil.fp_bits + FLAGS + FMA_OPT__ONLY +) + +add_fp_unittest( cos_test NEED_MPFR SUITE @@ -97,6 +111,20 @@ add_fp_unittest( ) add_fp_unittest( + sinf_float_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + sinf_float_test.cpp + DEPENDS + libc.src.__support.math.sincosf_utils + libc.src.__support.FPUtil.fp_bits + FLAGS + FMA_OPT__ONLY +) + +add_fp_unittest( sinf16_test NEED_MPFR SUITE diff --git a/libc/test/src/math/cosf_float_test.cpp b/libc/test/src/math/cosf_float_test.cpp new file mode 100644 index 0000000..3d573b2 --- /dev/null +++ b/libc/test/src/math/cosf_float_test.cpp @@ -0,0 +1,35 @@ +//===-- Unittests for cosf float-only -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/math/sincosf_float_eval.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include "hdr/stdint_proxy.h" + +using LlvmLibcCosfFloatTest = LIBC_NAMESPACE::testing::FPTest<float>; + +float cosf_fast(float x) { + return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval< + /*IS_SIN*/ false>(x); +} + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcCosfFloatTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (FPBits(v).is_nan() || FPBits(v).is_inf()) + continue; + ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cosf_fast(x), 3.5); + } +} diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index 1583ab6..2ff4f02 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -58,6 +58,21 @@ add_fp_unittest( ) add_fp_unittest( + sinf_float_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + sinf_float_test.cpp + LINK_LIBRARIES + -lpthread + DEPENDS + .exhaustive_test + libc.src.__support.math.sincosf_utils +) + +add_fp_unittest( sinpif_test NO_RUN_POSTBUILD NEED_MPFR @@ -90,6 +105,21 @@ add_fp_unittest( ) add_fp_unittest( + cosf_float_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + cosf_float_test.cpp + LINK_LIBRARIES + -lpthread + DEPENDS + .exhaustive_test + libc.src.__support.math.sincosf_utils +) + +add_fp_unittest( cospif_test NO_RUN_POSTBUILD NEED_MPFR diff --git a/libc/test/src/math/exhaustive/cosf_float_test.cpp b/libc/test/src/math/exhaustive/cosf_float_test.cpp new file mode 100644 index 0000000..0c3a988 --- /dev/null +++ b/libc/test/src/math/exhaustive/cosf_float_test.cpp @@ -0,0 +1,44 @@ +//===-- Exhaustive test for cosf - float-only -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/__support/math/sincosf_float_eval.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +float cosf_fast(float x) { + return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval< + /*IS_SIN*/ false>(x); +} + +using LlvmLibcCosfExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Cos, cosf_fast, + 3>; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRange) { + std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex + << POS_START << ", 0x" << POS_STOP << ") --" << std::dec + << std::endl; + test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRange) { + std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex + << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec + << std::endl; + test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/exhaustive/exhaustive_test.h b/libc/test/src/math/exhaustive/exhaustive_test.h index 8be65ba..322d774 100644 --- a/libc/test/src/math/exhaustive/exhaustive_test.h +++ b/libc/test/src/math/exhaustive/exhaustive_test.h @@ -40,7 +40,7 @@ template <typename OutType, typename InType = OutType> using UnaryOp = OutType(InType); template <typename OutType, typename InType, mpfr::Operation Op, - UnaryOp<OutType, InType> Func> + UnaryOp<OutType, InType> Func, unsigned Tolerance = 0> struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test { using FloatType = InType; using FPBits = LIBC_NAMESPACE::fputil::FPBits<FloatType>; @@ -57,8 +57,8 @@ struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test { do { FPBits xbits(bits); FloatType x = xbits.get_val(); - bool correct = - TEST_MPFR_MATCH_ROUNDING_SILENTLY(Op, x, Func(x), 0.5, rounding); + bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY( + Op, x, Func(x), static_cast<double>(Tolerance) + 0.5, rounding); failed += (!correct); // Uncomment to print out failed values. if (!correct) { @@ -256,9 +256,10 @@ struct LlvmLibcExhaustiveMathTest } }; -template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func> -using LlvmLibcUnaryOpExhaustiveMathTest = - LlvmLibcExhaustiveMathTest<UnaryOpChecker<FloatType, FloatType, Op, Func>>; +template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func, + unsigned Tolerance = 0> +using LlvmLibcUnaryOpExhaustiveMathTest = LlvmLibcExhaustiveMathTest< + UnaryOpChecker<FloatType, FloatType, Op, Func, Tolerance>>; template <typename OutType, typename InType, mpfr::Operation Op, UnaryOp<OutType, InType> Func> diff --git a/libc/test/src/math/exhaustive/sinf_float_test.cpp b/libc/test/src/math/exhaustive/sinf_float_test.cpp new file mode 100644 index 0000000..1e735e6 --- /dev/null +++ b/libc/test/src/math/exhaustive/sinf_float_test.cpp @@ -0,0 +1,47 @@ +//===-- Exhaustive test for sinf - float-only -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test float-only fast math implementation for sinf. +#define LIBC_MATH (LIBC_MATH_FAST | LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT) + +#include "exhaustive_test.h" +#include "src/__support/math/sincosf_float_eval.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +float sinf_fast(float x) { + return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval< + /*IS_SIN*/ true>(x); +} + +using LlvmLibcSinfExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Sin, sinf_fast, + 3>; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRange) { + std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex + << POS_START << ", 0x" << POS_STOP << ") --" << std::dec + << std::endl; + test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRange) { + std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex + << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec + << std::endl; + test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/sinf_float_test.cpp b/libc/test/src/math/sinf_float_test.cpp new file mode 100644 index 0000000..33aab96 --- /dev/null +++ b/libc/test/src/math/sinf_float_test.cpp @@ -0,0 +1,35 @@ +//===-- Unittests for sinf float-only -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/math/sincosf_float_eval.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include "hdr/stdint_proxy.h" + +using LlvmLibcSinfFloatTest = LIBC_NAMESPACE::testing::FPTest<float>; + +float sinf_fast(float x) { + return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval< + /*IS_SIN*/ true>(x); +} + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcSinfFloatTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (FPBits(v).is_nan() || FPBits(v).is_inf()) + continue; + ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, sinf_fast(x), 3.5); + } +} diff --git a/libc/test/src/nl_types/CMakeLists.txt b/libc/test/src/nl_types/CMakeLists.txt new file mode 100644 index 0000000..4fce637 --- /dev/null +++ b/libc/test/src/nl_types/CMakeLists.txt @@ -0,0 +1,14 @@ +add_custom_target(libc-nl-types-tests) + +add_libc_test( + nl_types_test + SUITE + libc-nl-types-tests + SRCS + nl_types_test.cpp + DEPENDS + libc.include.llvm-libc-types.nl_catd + libc.src.nl_types.catopen + libc.src.nl_types.catclose + libc.src.nl_types.catgets +) diff --git a/libc/test/src/nl_types/nl_types_test.cpp b/libc/test/src/nl_types/nl_types_test.cpp new file mode 100644 index 0000000..5ae5c5a --- /dev/null +++ b/libc/test/src/nl_types/nl_types_test.cpp @@ -0,0 +1,33 @@ +//===-- Unittests for nl_types --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/llvm-libc-types/nl_catd.h" +#include "src/nl_types/catclose.h" +#include "src/nl_types/catgets.h" +#include "src/nl_types/catopen.h" +#include "test/UnitTest/ErrnoCheckingTest.h" + +using LlvmLibcNlTypesTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcNlTypesTest, CatopenFails) { + ASSERT_EQ(LIBC_NAMESPACE::catopen("/somepath", 0), + reinterpret_cast<nl_catd>(-1)); + ASSERT_ERRNO_EQ(EINVAL); +} + +TEST_F(LlvmLibcNlTypesTest, CatcloseFails) { + ASSERT_EQ(LIBC_NAMESPACE::catclose(nullptr), -1); +} + +TEST_F(LlvmLibcNlTypesTest, CatgetsFails) { + const char *message = "message"; + // Note that we test for pointer equality here, since catgets + // is expected to return the input argument as-is. + ASSERT_EQ(LIBC_NAMESPACE::catgets(nullptr, 0, 0, message), + const_cast<char *>(message)); +} diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 5d71d2cf..1ab4c43 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -25,6 +25,8 @@ #if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) +.att_syntax + DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto) # # extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *); @@ -69,6 +71,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto) # skip gs #elif defined(__x86_64__) && !defined(__arm64ec__) +.att_syntax DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto) # diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index fe3ba78..31a177f 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -25,6 +25,7 @@ #if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) +.att_syntax # # extern int __unw_getcontext(unw_context_t* thread_state) @@ -109,6 +110,7 @@ DEFINE_LIBUNWIND_FUNCTION("#__unw_getcontext") .text #elif defined(__x86_64__) +.att_syntax # # extern int __unw_getcontext(unw_context_t* thread_state) diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md index 287484e..032edb6 100644 --- a/lldb/docs/resources/lldbgdbremote.md +++ b/lldb/docs/resources/lldbgdbremote.md @@ -2491,9 +2491,10 @@ The packet below are supported by the ### qWasmCallStack Get the Wasm call stack for the given thread id. This returns a hex-encoded -list of PC values, one for each frame of the call stack. To match the Wasm -specification, the addresses are encoded in little endian byte order, even if -the endian of the Wasm runtime's host is not little endian. +list (with no delimiters) of 64-bit PC values, one for each frame of the call +stack. To match the Wasm specification, the addresses are encoded in little +endian byte order, even if the endian of the Wasm runtime's host is not little +endian. ``` send packet: $qWasmCallStack:202dbe040#08 diff --git a/lldb/include/lldb/Symbol/DeclVendor.h b/lldb/include/lldb/Symbol/DeclVendor.h index 19ab2bb..5b0cbf9 100644 --- a/lldb/include/lldb/Symbol/DeclVendor.h +++ b/lldb/include/lldb/Symbol/DeclVendor.h @@ -20,7 +20,6 @@ namespace lldb_private { class DeclVendor { public: enum DeclVendorKind { - eClangDeclVendor, eClangModuleDeclVendor, eAppleObjCDeclVendor, eLastClangDeclVendor, diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index dc75d98..8f5892e 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1571,6 +1571,28 @@ public: virtual size_t ReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, Status &error); + /// Read from multiple memory ranges and write the results into buffer. + /// This calls ReadMemoryFromInferior multiple times, once per range, + /// bypassing the read cache. Process implementations that can perform this + /// operation more efficiently should override this. + /// + /// \param[in] ranges + /// A collection of ranges (base address + size) to read from. + /// + /// \param[out] buffer + /// A buffer where the read memory will be written to. It must be at least + /// as long as the sum of the sizes of each range. + /// + /// \return + /// A vector of MutableArrayRef, where each MutableArrayRef is a slice of + /// the input buffer into which the memory contents were copied. The size + /// of the slice indicates how many bytes were read successfully. Partial + /// reads are always performed from the start of the requested range, + /// never from the middle or end. + virtual llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> + ReadMemoryRanges(llvm::ArrayRef<Range<lldb::addr_t, size_t>> ranges, + llvm::MutableArrayRef<uint8_t> buffer); + /// Read of memory from a process. /// /// This function has the same semantics of ReadMemory except that it diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt index 2aae7d1..01d588f 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt +++ b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt @@ -5,7 +5,6 @@ add_lldb_library(lldbPluginExpressionParserClang ClangASTImporter.cpp ClangASTMetadata.cpp ClangASTSource.cpp - ClangDeclVendor.cpp ClangExpressionDeclMap.cpp ClangExpressionHelper.cpp ClangExpressionParser.cpp diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index ebe7be4..0efeb2e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -8,7 +8,6 @@ #include "ClangASTSource.h" -#include "ClangDeclVendor.h" #include "ClangModulesDeclVendor.h" #include "lldb/Core/Module.h" @@ -799,7 +798,7 @@ void ClangASTSource::FindDeclInModules(NameSearchContext &context, bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; if (!modules_decl_vendor->FindDecls(name, append, max_matches, decls)) return; @@ -807,7 +806,8 @@ void ClangASTSource::FindDeclInModules(NameSearchContext &context, LLDB_LOG(log, " CAS::FEVD Matching entity found for \"{0}\" in the modules", name); - clang::NamedDecl *const decl_from_modules = decls[0]; + auto *const decl_from_modules = + llvm::cast<NamedDecl>(ClangUtil::GetDecl(decls[0])); if (llvm::isa<clang::TypeDecl>(decl_from_modules) || llvm::isa<clang::ObjCContainerDecl>(decl_from_modules) || @@ -849,16 +849,16 @@ void ClangASTSource::FindDeclInObjCRuntime(NameSearchContext &context, bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; - auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor); + auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor); if (!clang_decl_vendor->FindDecls(name, append, max_matches, decls)) return; LLDB_LOG(log, " CAS::FEVD Matching type found for \"{0}\" in the runtime", name); - clang::Decl *copied_decl = CopyDecl(decls[0]); + clang::Decl *copied_decl = CopyDecl(ClangUtil::GetDecl(decls[0])); clang::NamedDecl *copied_named_decl = copied_decl ? dyn_cast<clang::NamedDecl>(copied_decl) : nullptr; @@ -1081,14 +1081,14 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) { ConstString interface_name(interface_decl->getNameAsString().c_str()); bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; if (!modules_decl_vendor->FindDecls(interface_name, append, max_matches, decls)) break; ObjCInterfaceDecl *interface_decl_from_modules = - dyn_cast<ObjCInterfaceDecl>(decls[0]); + dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0])); if (!interface_decl_from_modules) break; @@ -1121,15 +1121,15 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) { ConstString interface_name(interface_decl->getNameAsString().c_str()); bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; - auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor); + auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor); if (!clang_decl_vendor->FindDecls(interface_name, append, max_matches, decls)) break; ObjCInterfaceDecl *runtime_interface_decl = - dyn_cast<ObjCInterfaceDecl>(decls[0]); + dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0])); if (!runtime_interface_decl) break; @@ -1254,13 +1254,13 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) { bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; if (!modules_decl_vendor->FindDecls(class_name, append, max_matches, decls)) break; DeclFromUser<const ObjCInterfaceDecl> interface_decl_from_modules( - dyn_cast<ObjCInterfaceDecl>(decls[0])); + dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0]))); if (!interface_decl_from_modules.IsValid()) break; @@ -1297,14 +1297,14 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) { bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; - auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor); + auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor); if (!clang_decl_vendor->FindDecls(class_name, append, max_matches, decls)) break; DeclFromUser<const ObjCInterfaceDecl> interface_decl_from_runtime( - dyn_cast<ObjCInterfaceDecl>(decls[0])); + dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0]))); if (!interface_decl_from_runtime.IsValid()) break; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp deleted file mode 100644 index 867d4ff..0000000 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp +++ /dev/null @@ -1,31 +0,0 @@ -//===-- ClangDeclVendor.cpp -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h" -#include "Plugins/ExpressionParser/Clang/ClangUtil.h" -#include "Plugins/TypeSystem/Clang/TypeSystemClang.h" - -#include "lldb/Utility/ConstString.h" - -using namespace lldb_private; - -uint32_t ClangDeclVendor::FindDecls(ConstString name, bool append, - uint32_t max_matches, - std::vector<clang::NamedDecl *> &decls) { - if (!append) - decls.clear(); - - std::vector<CompilerDecl> compiler_decls; - uint32_t ret = FindDecls(name, /*append*/ false, max_matches, compiler_decls); - for (CompilerDecl compiler_decl : compiler_decls) { - clang::Decl *d = ClangUtil::GetDecl(compiler_decl); - clang::NamedDecl *nd = llvm::cast<clang::NamedDecl>(d); - decls.push_back(nd); - } - return ret; -} diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h deleted file mode 100644 index a9b2d41..0000000 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- ClangDeclVendor.h ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGDECLVENDOR_H -#define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGDECLVENDOR_H - -#include "lldb/Symbol/DeclVendor.h" - -namespace clang { -class NamedDecl; -} - -namespace lldb_private { - -// A clang specialized extension to DeclVendor. -class ClangDeclVendor : public DeclVendor { -public: - ClangDeclVendor(DeclVendorKind kind) : DeclVendor(kind) {} - - ~ClangDeclVendor() override = default; - - using DeclVendor::FindDecls; - - uint32_t FindDecls(ConstString name, bool append, uint32_t max_matches, - std::vector<clang::NamedDecl *> &decls); - - static bool classof(const DeclVendor *vendor) { - return vendor->GetKind() >= eClangDeclVendor && - vendor->GetKind() < eLastClangDeclVendor; - } - -private: - ClangDeclVendor(const ClangDeclVendor &) = delete; - const ClangDeclVendor &operator=(const ClangDeclVendor &) = delete; -}; -} // namespace lldb_private - -#endif diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 833bc3b..9cb8f7a 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1023,13 +1023,14 @@ void ClangExpressionDeclMap::LookupInModulesDeclVendor( bool append = false; uint32_t max_matches = 1; - std::vector<clang::NamedDecl *> decls; + std::vector<CompilerDecl> decls; if (!modules_decl_vendor->FindDecls(name, append, max_matches, decls)) return; assert(!decls.empty() && "FindDecls returned true but no decls?"); - clang::NamedDecl *const decl_from_modules = decls[0]; + auto *const decl_from_modules = + llvm::cast<NamedDecl>(ClangUtil::GetDecl(decls[0])); LLDB_LOG(log, " CAS::FEVD Matching decl found for " @@ -1223,7 +1224,7 @@ bool ClangExpressionDeclMap::LookupFunction( Target *target = m_parser_vars->m_exe_ctx.GetTargetPtr(); - std::vector<clang::NamedDecl *> decls_from_modules; + std::vector<CompilerDecl> decls_from_modules; if (target) { if (std::shared_ptr<ClangModulesDeclVendor> decl_vendor = @@ -1314,7 +1315,8 @@ bool ClangExpressionDeclMap::LookupFunction( } if (!found_function_with_type_info) { - for (clang::NamedDecl *decl : decls_from_modules) { + for (const CompilerDecl &compiler_decl : decls_from_modules) { + clang::Decl *decl = ClangUtil::GetDecl(compiler_decl); if (llvm::isa<clang::FunctionDecl>(decl)) { clang::NamedDecl *copied_decl = llvm::cast_or_null<FunctionDecl>(CopyDecl(decl)); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 67984c5..b77e269 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -226,7 +226,7 @@ void StoringDiagnosticConsumer::SetCurrentModuleProgress( } ClangModulesDeclVendor::ClangModulesDeclVendor() - : ClangDeclVendor(eClangModuleDeclVendor) {} + : DeclVendor(eClangModuleDeclVendor) {} ClangModulesDeclVendor::~ClangModulesDeclVendor() = default; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h index d820552..ad4d060 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h @@ -9,17 +9,16 @@ #ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGMODULESDECLVENDOR_H #define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGMODULESDECLVENDOR_H +#include "lldb/Symbol/DeclVendor.h" #include "lldb/Symbol/SourceModule.h" #include "lldb/Target/Platform.h" -#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h" - #include <set> #include <vector> namespace lldb_private { -class ClangModulesDeclVendor : public ClangDeclVendor { +class ClangModulesDeclVendor : public DeclVendor { public: // Constructors and Destructors ClangModulesDeclVendor(); diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp index 460c503..954f269 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp @@ -281,22 +281,23 @@ ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, const size_t num_methods = addresses.size(); llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0); - llvm::DenseSet<uint32_t> failed_indices; - for (auto [idx, addr] : llvm::enumerate(addresses)) { - Status error; - process->ReadMemory(addr, buffer.data() + idx * size, size, error); - if (error.Fail()) - failed_indices.insert(idx); - } + llvm::SmallVector<Range<addr_t, size_t>> mem_ranges = + llvm::to_vector(llvm::map_range(llvm::seq(num_methods), [&](size_t idx) { + return Range<addr_t, size_t>(addresses[idx], size); + })); + + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results = + process->ReadMemoryRanges(mem_ranges, buffer); llvm::SmallVector<method_t, 0> methods; methods.reserve(num_methods); - for (auto [idx, addr] : llvm::enumerate(addresses)) { - if (failed_indices.contains(idx)) + for (auto [addr, memory] : llvm::zip(addresses, read_results)) { + // Ignore partial reads. + if (memory.size() != size) continue; - DataExtractor extractor(buffer.data() + idx * size, size, - process->GetByteOrder(), + + DataExtractor extractor(memory.data(), size, process->GetByteOrder(), process->GetAddressByteSize()); methods.push_back(method_t()); methods.back().Read(extractor, process, addr, relative_string_base_addr, diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index d6d2df2..60f9893 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -131,7 +131,7 @@ private: }; AppleObjCDeclVendor::AppleObjCDeclVendor(ObjCLanguageRuntime &runtime) - : ClangDeclVendor(eAppleObjCDeclVendor), m_runtime(runtime), + : DeclVendor(eAppleObjCDeclVendor), m_runtime(runtime), m_type_realizer_sp(m_runtime.GetEncodingToType()) { m_ast_ctx = std::make_shared<TypeSystemClang>( "AppleObjCDeclVendor AST", @@ -537,83 +537,75 @@ uint32_t AppleObjCDeclVendor::FindDecls(ConstString name, bool append, if (!append) decls.clear(); - uint32_t ret = 0; + // See if the type is already in our ASTContext. - do { - // See if the type is already in our ASTContext. - - clang::ASTContext &ast_ctx = m_ast_ctx->getASTContext(); - - clang::IdentifierInfo &identifier_info = - ast_ctx.Idents.get(name.GetStringRef()); - clang::DeclarationName decl_name = - ast_ctx.DeclarationNames.getIdentifier(&identifier_info); - - clang::DeclContext::lookup_result lookup_result = - ast_ctx.getTranslationUnitDecl()->lookup(decl_name); - - if (!lookup_result.empty()) { - if (clang::ObjCInterfaceDecl *result_iface_decl = - llvm::dyn_cast<clang::ObjCInterfaceDecl>(*lookup_result.begin())) { - if (log) { - clang::QualType result_iface_type = - ast_ctx.getObjCInterfaceType(result_iface_decl); - - uint64_t isa_value = LLDB_INVALID_ADDRESS; - if (std::optional<ClangASTMetadata> metadata = - m_ast_ctx->GetMetadata(result_iface_decl)) - isa_value = metadata->GetISAPtr(); - - LLDB_LOGF(log, - "AOCTV::FT Found %s (isa 0x%" PRIx64 ") in the ASTContext", - result_iface_type.getAsString().data(), isa_value); - } + clang::ASTContext &ast_ctx = m_ast_ctx->getASTContext(); - decls.push_back(m_ast_ctx->GetCompilerDecl(result_iface_decl)); - ret++; - break; - } else { - LLDB_LOGF(log, "AOCTV::FT There's something in the ASTContext, but " - "it's not something we know about"); - break; + clang::IdentifierInfo &identifier_info = + ast_ctx.Idents.get(name.GetStringRef()); + clang::DeclarationName decl_name = + ast_ctx.DeclarationNames.getIdentifier(&identifier_info); + + clang::DeclContext::lookup_result lookup_result = + ast_ctx.getTranslationUnitDecl()->lookup(decl_name); + + if (!lookup_result.empty()) { + if (clang::ObjCInterfaceDecl *result_iface_decl = + llvm::dyn_cast<clang::ObjCInterfaceDecl>(*lookup_result.begin())) { + if (log) { + clang::QualType result_iface_type = + ast_ctx.getObjCInterfaceType(result_iface_decl); + + uint64_t isa_value = LLDB_INVALID_ADDRESS; + if (std::optional<ClangASTMetadata> metadata = + m_ast_ctx->GetMetadata(result_iface_decl)) + isa_value = metadata->GetISAPtr(); + + LLDB_LOGF(log, + "AOCTV::FT Found %s (isa 0x%" PRIx64 ") in the ASTContext", + result_iface_type.getAsString().data(), isa_value); } - } else if (log) { - LLDB_LOGF(log, "AOCTV::FT Couldn't find %s in the ASTContext", - name.AsCString()); + + decls.push_back(m_ast_ctx->GetCompilerDecl(result_iface_decl)); + return 1; } - // It's not. If it exists, we have to put it into our ASTContext. + LLDB_LOGF(log, "AOCTV::FT There's something in the ASTContext, but " + "it's not something we know about"); + return 0; + } - ObjCLanguageRuntime::ObjCISA isa = m_runtime.GetISA(name); + LLDB_LOGF(log, "AOCTV::FT Couldn't find %s in the ASTContext", + name.AsCString()); - if (!isa) { - LLDB_LOGF(log, "AOCTV::FT Couldn't find the isa"); + // It's not. If it exists, we have to put it into our ASTContext. - break; - } + ObjCLanguageRuntime::ObjCISA isa = m_runtime.GetISA(name); - clang::ObjCInterfaceDecl *iface_decl = GetDeclForISA(isa); + if (!isa) { + LLDB_LOGF(log, "AOCTV::FT Couldn't find the isa"); - if (!iface_decl) { - LLDB_LOGF(log, - "AOCTV::FT Couldn't get the Objective-C interface for " - "isa 0x%" PRIx64, - (uint64_t)isa); + return 0; + } - break; - } + clang::ObjCInterfaceDecl *iface_decl = GetDeclForISA(isa); - if (log) { - clang::QualType new_iface_type = ast_ctx.getObjCInterfaceType(iface_decl); + if (!iface_decl) { + LLDB_LOGF(log, + "AOCTV::FT Couldn't get the Objective-C interface for " + "isa 0x%" PRIx64, + (uint64_t)isa); - LLDB_LOG(log, "AOCTV::FT Created {0} (isa 0x{1:x})", - new_iface_type.getAsString(), (uint64_t)isa); - } + return 0; + } - decls.push_back(m_ast_ctx->GetCompilerDecl(iface_decl)); - ret++; - break; - } while (false); + if (log) { + clang::QualType new_iface_type = ast_ctx.getObjCInterfaceType(iface_decl); + + LLDB_LOG(log, "AOCTV::FT Created {0} (isa 0x{1:x})", + new_iface_type.getAsString(), (uint64_t)isa); + } - return ret; + decls.push_back(m_ast_ctx->GetCompilerDecl(iface_decl)); + return 1; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h index 3bb0f77..2cfa86d 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h @@ -11,15 +11,15 @@ #include "lldb/lldb-private.h" -#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h" #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" +#include "lldb/Symbol/DeclVendor.h" namespace lldb_private { class AppleObjCExternalASTSource; -class AppleObjCDeclVendor : public ClangDeclVendor { +class AppleObjCDeclVendor : public DeclVendor { public: AppleObjCDeclVendor(ObjCLanguageRuntime &runtime); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 3176852..fb9e7eb 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -1971,6 +1971,49 @@ size_t Process::ReadMemory(addr_t addr, void *buf, size_t size, Status &error) { } } +llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> +Process::ReadMemoryRanges(llvm::ArrayRef<Range<lldb::addr_t, size_t>> ranges, + llvm::MutableArrayRef<uint8_t> buffer) { + auto total_ranges_len = llvm::sum_of( + llvm::map_range(ranges, [](auto range) { return range.size; })); + // If the buffer is not large enough, this is a programmer error. + // In production builds, gracefully fail by returning a length of 0 for all + // ranges. + assert(buffer.size() >= total_ranges_len && "provided buffer is too short"); + if (buffer.size() < total_ranges_len) { + llvm::MutableArrayRef<uint8_t> empty; + return {ranges.size(), empty}; + } + + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> results; + + // While `buffer` has space, take the next requested range and read + // memory into a `buffer` piece, then slice it to remove the used memory. + for (auto [addr, range_len] : ranges) { + Status status; + size_t num_bytes_read = + ReadMemoryFromInferior(addr, buffer.data(), range_len, status); + // FIXME: ReadMemoryFromInferior promises to return 0 in case of errors, but + // it doesn't; it never checks for errors. + if (status.Fail()) + num_bytes_read = 0; + + assert(num_bytes_read <= range_len && "read more than requested bytes"); + if (num_bytes_read > range_len) { + // In production builds, gracefully fail by returning length zero for this + // range. + results.emplace_back(); + continue; + } + + results.push_back(buffer.take_front(num_bytes_read)); + // Slice buffer to remove the used memory. + buffer = buffer.drop_front(num_bytes_read); + } + + return results; +} + void Process::DoFindInMemory(lldb::addr_t start_addr, lldb::addr_t end_addr, const uint8_t *buf, size_t size, AddressRanges &matches, size_t alignment, diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index b1ace62..e3bffbc 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -140,7 +140,12 @@ if(CMAKE_HOST_APPLE) endif() if(WIN32 AND CMAKE_BUILD_TYPE STREQUAL Debug) - set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE_DEBUG}") + if(${CMAKE_VERSION} VERSION_LESS "3.30") + message(WARNING "CMake version is inferior to 3.30. Some lldb tests will fail.") + set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE}") + else() + set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE_DEBUG}") + endif() else() set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE}") endif() diff --git a/lldb/unittests/Target/MemoryTest.cpp b/lldb/unittests/Target/MemoryTest.cpp index 4a96730..f7b4e97 100644 --- a/lldb/unittests/Target/MemoryTest.cpp +++ b/lldb/unittests/Target/MemoryTest.cpp @@ -17,6 +17,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "gtest/gtest.h" +#include <cstdint> using namespace lldb_private; using namespace lldb; @@ -225,3 +226,144 @@ TEST_F(MemoryTest, TesetMemoryCacheRead) { // instead of using an // old cache } + +/// A process class that, when asked to read memory from some address X, returns +/// the least significant byte of X. +class DummyReaderProcess : public Process { +public: + // If true, `DoReadMemory` will not return all requested bytes. + // It's not possible to control exactly how many bytes will be read, because + // Process::ReadMemoryFromInferior tries to fulfill the entire request by + // reading smaller chunks until it gets nothing back. + bool read_less_than_requested = false; + bool read_more_than_requested = false; + + size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, + Status &error) override { + if (read_less_than_requested && size > 0) + size--; + if (read_more_than_requested) + size *= 2; + uint8_t *buffer = static_cast<uint8_t *>(buf); + for (size_t addr = vm_addr; addr < vm_addr + size; addr++) + buffer[addr - vm_addr] = static_cast<uint8_t>(addr); // LSB of addr. + return size; + } + // Boilerplate, nothing interesting below. + DummyReaderProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp) + : Process(target_sp, listener_sp) {} + bool CanDebug(lldb::TargetSP, bool) override { return true; } + Status DoDestroy() override { return {}; } + void RefreshStateAfterStop() override {} + bool DoUpdateThreadList(ThreadList &, ThreadList &) override { return false; } + llvm::StringRef GetPluginName() override { return "Dummy"; } +}; + +TEST_F(MemoryTest, TestReadMemoryRanges) { + ArchSpec arch("x86_64-apple-macosx-"); + + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared<DummyReaderProcess>(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + { + llvm::SmallVector<uint8_t, 0> buffer(1024, 0); + // Read 8 ranges of 128 bytes with arbitrary base addresses. + llvm::SmallVector<Range<addr_t, size_t>> ranges = { + {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128}, + {0xffaabbccdd, 128}, {0x0, 128}, {0x4242424242, 128}, + {0x17171717, 128}, {0x99999, 128}}; + + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results = + process_sp->ReadMemoryRanges(ranges, buffer); + + for (auto [range, memory] : llvm::zip(ranges, read_results)) { + ASSERT_EQ(memory.size(), 128u); + addr_t range_base = range.GetRangeBase(); + for (auto [idx, byte] : llvm::enumerate(memory)) + ASSERT_EQ(byte, static_cast<uint8_t>(range_base + idx)); + } + } + + auto &dummy_process = static_cast<DummyReaderProcess &>(*process_sp); + dummy_process.read_less_than_requested = true; + { + llvm::SmallVector<uint8_t, 0> buffer(1024, 0); + llvm::SmallVector<Range<addr_t, size_t>> ranges = { + {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128}}; + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results = + dummy_process.ReadMemoryRanges(ranges, buffer); + for (auto [range, memory] : llvm::zip(ranges, read_results)) { + ASSERT_LT(memory.size(), 128u); + addr_t range_base = range.GetRangeBase(); + for (auto [idx, byte] : llvm::enumerate(memory)) + ASSERT_EQ(byte, static_cast<uint8_t>(range_base + idx)); + } + } +} + +using MemoryDeathTest = MemoryTest; + +TEST_F(MemoryDeathTest, TestReadMemoryRangesReturnsTooMuch) { + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared<DummyReaderProcess>(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + auto &dummy_process = static_cast<DummyReaderProcess &>(*process_sp); + dummy_process.read_more_than_requested = true; + llvm::SmallVector<uint8_t, 0> buffer(1024, 0); + llvm::SmallVector<Range<addr_t, size_t>> ranges = {{0x12345, 128}}; + + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results; + ASSERT_DEBUG_DEATH( + { read_results = process_sp->ReadMemoryRanges(ranges, buffer); }, + "read more than requested bytes"); +#ifdef NDEBUG + // With asserts off, the read should return empty ranges. + ASSERT_EQ(read_results.size(), 1u); + ASSERT_TRUE(read_results[0].empty()); +#endif +} + +TEST_F(MemoryDeathTest, TestReadMemoryRangesWithShortBuffer) { + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared<DummyReaderProcess>(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + llvm::SmallVector<uint8_t, 0> short_buffer(10, 0); + llvm::SmallVector<Range<addr_t, size_t>> ranges = {{0x12345, 128}, + {0x11, 128}}; + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results; + ASSERT_DEBUG_DEATH( + { read_results = process_sp->ReadMemoryRanges(ranges, short_buffer); }, + "provided buffer is too short"); +#ifdef NDEBUG + // With asserts off, the read should return empty ranges. + ASSERT_EQ(read_results.size(), ranges.size()); + for (llvm::MutableArrayRef<uint8_t> result : read_results) + ASSERT_TRUE(result.empty()); +#endif +} diff --git a/llvm/docs/CommandGuide/llvm-dwarfdump.rst b/llvm/docs/CommandGuide/llvm-dwarfdump.rst index 27ad4226..1378302 100644 --- a/llvm/docs/CommandGuide/llvm-dwarfdump.rst +++ b/llvm/docs/CommandGuide/llvm-dwarfdump.rst @@ -83,7 +83,7 @@ OPTIONS .. option:: -n <name>, --name=<name> Find and print all debug info entries whose name - (`DW_AT_name` attribute) is <name>. + (`DW_AT_name`/`DW_AT_linkage_name` attribute) is <name>. .. option:: --lookup=<address> diff --git a/llvm/include/llvm/ADT/Twine.h b/llvm/include/llvm/ADT/Twine.h index d9f9c0f..e3b4d5e 100644 --- a/llvm/include/llvm/ADT/Twine.h +++ b/llvm/include/llvm/ADT/Twine.h @@ -285,7 +285,7 @@ public: } /// Construct from a StringRef. - /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) { + /*implicit*/ Twine(StringRef Str) : LHSKind(PtrAndLengthKind) { LHS.ptrAndLength.ptr = Str.data(); LHS.ptrAndLength.length = Str.size(); assert(isValid() && "Invalid twine!"); @@ -352,7 +352,7 @@ public: // right thing. Yet. /// Construct as the concatenation of a C string and a StringRef. - /*implicit*/ Twine(const char *LHS, const StringRef &RHS) + /*implicit*/ Twine(const char *LHS, StringRef RHS) : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) { this->LHS.cString = LHS; this->RHS.ptrAndLength.ptr = RHS.data(); @@ -361,7 +361,7 @@ public: } /// Construct as the concatenation of a StringRef and a C string. - /*implicit*/ Twine(const StringRef &LHS, const char *RHS) + /*implicit*/ Twine(StringRef LHS, const char *RHS) : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) { this->LHS.ptrAndLength.ptr = LHS.data(); this->LHS.ptrAndLength.length = LHS.size(); @@ -530,14 +530,14 @@ inline Twine operator+(const Twine &LHS, const Twine &RHS) { /// Additional overload to guarantee simplified codegen; this is equivalent to /// concat(). -inline Twine operator+(const char *LHS, const StringRef &RHS) { +inline Twine operator+(const char *LHS, StringRef RHS) { return Twine(LHS, RHS); } /// Additional overload to guarantee simplified codegen; this is equivalent to /// concat(). -inline Twine operator+(const StringRef &LHS, const char *RHS) { +inline Twine operator+(StringRef LHS, const char *RHS) { return Twine(LHS, RHS); } diff --git a/llvm/include/llvm/CodeGen/MIR2Vec.h b/llvm/include/llvm/CodeGen/MIR2Vec.h index 7b1b5d9..f6b0571 100644 --- a/llvm/include/llvm/CodeGen/MIR2Vec.h +++ b/llvm/include/llvm/CodeGen/MIR2Vec.h @@ -52,11 +52,21 @@ class LLVMContext; class MIR2VecVocabLegacyAnalysis; class TargetInstrInfo; +enum class MIR2VecKind { Symbolic }; + namespace mir2vec { + +// Forward declarations +class MIREmbedder; +class SymbolicMIREmbedder; + extern llvm::cl::OptionCategory MIR2VecCategory; extern cl::opt<float> OpcWeight; using Embedding = ir2vec::Embedding; +using MachineInstEmbeddingsMap = DenseMap<const MachineInstr *, Embedding>; +using MachineBlockEmbeddingsMap = + DenseMap<const MachineBasicBlock *, Embedding>; /// Class for storing and accessing the MIR2Vec vocabulary. /// The MIRVocabulary class manages seed embeddings for LLVM Machine IR @@ -107,19 +117,91 @@ public: const_iterator end() const { return Storage.end(); } - /// Total number of entries in the vocabulary - size_t getCanonicalSize() const { return Storage.size(); } - MIRVocabulary() = delete; /// Factory method to create MIRVocabulary from vocabulary map static Expected<MIRVocabulary> create(VocabMap &&Entries, const TargetInstrInfo &TII); + /// Create a dummy vocabulary for testing purposes. + static Expected<MIRVocabulary> + createDummyVocabForTest(const TargetInstrInfo &TII, unsigned Dim = 1); + + /// Total number of entries in the vocabulary + size_t getCanonicalSize() const { return Storage.size(); } + private: MIRVocabulary(VocabMap &&Entries, const TargetInstrInfo &TII); }; +/// Base class for MIR embedders +class MIREmbedder { +protected: + const MachineFunction &MF; + const MIRVocabulary &Vocab; + + /// Dimension of the embeddings; Captured from the vocabulary + const unsigned Dimension; + + /// Weight for opcode embeddings + const float OpcWeight; + + MIREmbedder(const MachineFunction &MF, const MIRVocabulary &Vocab) + : MF(MF), Vocab(Vocab), Dimension(Vocab.getDimension()), + OpcWeight(mir2vec::OpcWeight) {} + + /// Function to compute embeddings. + Embedding computeEmbeddings() const; + + /// Function to compute the embedding for a given machine basic block. + Embedding computeEmbeddings(const MachineBasicBlock &MBB) const; + + /// Function to compute the embedding for a given machine instruction. + /// Specific to the kind of embeddings being computed. + virtual Embedding computeEmbeddings(const MachineInstr &MI) const = 0; + +public: + virtual ~MIREmbedder() = default; + + /// Factory method to create an Embedder object of the specified kind + /// Returns nullptr if the requested kind is not supported. + static std::unique_ptr<MIREmbedder> create(MIR2VecKind Mode, + const MachineFunction &MF, + const MIRVocabulary &Vocab); + + /// Computes and returns the embedding for a given machine instruction MI in + /// the machine function MF. + Embedding getMInstVector(const MachineInstr &MI) const { + return computeEmbeddings(MI); + } + + /// Computes and returns the embedding for a given machine basic block in the + /// machine function MF. + Embedding getMBBVector(const MachineBasicBlock &MBB) const { + return computeEmbeddings(MBB); + } + + /// Computes and returns the embedding for the current machine function. + Embedding getMFunctionVector() const { + // Currently, we always (re)compute the embeddings for the function. This is + // cheaper than caching the vector. + return computeEmbeddings(); + } +}; + +/// Class for computing Symbolic embeddings +/// Symbolic embeddings are constructed based on the entity-level +/// representations obtained from the MIR Vocabulary. +class SymbolicMIREmbedder : public MIREmbedder { +private: + Embedding computeEmbeddings(const MachineInstr &MI) const override; + +public: + SymbolicMIREmbedder(const MachineFunction &F, const MIRVocabulary &Vocab); + static std::unique_ptr<SymbolicMIREmbedder> + create(const MachineFunction &MF, const MIRVocabulary &Vocab); +}; + } // namespace mir2vec /// Pass to analyze and populate MIR2Vec vocabulary from a module @@ -166,6 +248,31 @@ public: } }; +/// This pass prints the MIR2Vec embeddings for machine functions, basic blocks, +/// and instructions +class MIR2VecPrinterLegacyPass : public MachineFunctionPass { + raw_ostream &OS; + +public: + static char ID; + explicit MIR2VecPrinterLegacyPass(raw_ostream &OS) + : MachineFunctionPass(ID), OS(OS) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MIR2VecVocabLegacyAnalysis>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "MIR2Vec Embedder Printer Pass"; + } +}; + +/// Create a machine pass that prints MIR2Vec embeddings +MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS); + } // namespace llvm #endif // LLVM_CODEGEN_MIR2VEC_H
\ No newline at end of file diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 272b4ac..7fae550 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -93,6 +93,10 @@ createMachineFunctionPrinterPass(raw_ostream &OS, LLVM_ABI MachineFunctionPass * createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS); +/// MIR2VecPrinter pass - This pass prints out the MIR2Vec embeddings for +/// machine functions, basic blocks and instructions. +LLVM_ABI MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS); + /// StackFramePrinter pass - This pass prints out the machine function's /// stack frame to the given stream as a debugging tool. LLVM_ABI MachineFunctionPass *createStackFrameLayoutAnalysisPass(); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 73f2c55..64a7563 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2459,6 +2459,12 @@ public: return ISD::ANY_EXTEND; } + /// Returns how the platform's atomic rmw operations expect their input + /// argument to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). + virtual ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const { + return ISD::ANY_EXTEND; + } + /// @} /// Returns true if we should normalize diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 12d1c25..e6cce9a4 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2851,7 +2851,15 @@ def int_ptrauth_blend : def int_ptrauth_sign_generic : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; +//===----------------- AllocToken Intrinsics ------------------------------===// + +// Return the token ID for the given !alloc_token metadata. +def int_alloc_token_id : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty], + [IntrNoMem, NoUndef<RetIndex>]>; + //===----------------------------------------------------------------------===// + //===------- Convergence Intrinsics ---------------------------------------===// def int_experimental_convergence_entry diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index ada3523..0135989 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -236,7 +236,7 @@ private: static bool hasAEABILibcalls(const Triple &TT) { return TT.isTargetAEABI() || TT.isTargetGNUAEABI() || - TT.isTargetMuslAEABI() || TT.isAndroid(); + TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isAndroid(); } LLVM_READONLY diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index cd774e7..d507ba2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -222,6 +222,7 @@ LLVM_ABI void initializeMachineSanitizerBinaryMetadataLegacyPass(PassRegistry &); LLVM_ABI void initializeMIR2VecVocabLegacyAnalysisPass(PassRegistry &); LLVM_ABI void initializeMIR2VecVocabPrinterLegacyPassPass(PassRegistry &); +LLVM_ABI void initializeMIR2VecPrinterLegacyPassPass(PassRegistry &); LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &); LLVM_ABI void initializeMachineSinkingLegacyPass(PassRegistry &); LLVM_ABI void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index dc8cd86d..5e43444 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -935,7 +935,8 @@ public: getEnvironment() == Triple::GNUEABIHF || getEnvironment() == Triple::GNUEABIHFT64 || getEnvironment() == Triple::OpenHOS || - getEnvironment() == Triple::MuslEABIHF || isAndroid()) && + getEnvironment() == Triple::MuslEABIHF || isOSFuchsia() || + isAndroid()) && isOSBinFormatELF() && !isOSNetBSD(); } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 8da51d0..b573023 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, return nullptr; } +/// Look for the following pattern and simplify %to_fold to %identicalPhi. +/// Here %phi, %to_fold and %phi.next perform the same functionality as +/// %identicalPhi and hence the select instruction %to_fold can be folded +/// into %identicalPhi. +/// +/// BB1: +/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ] +/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ] +/// ... +/// %identicalPhi.next = select %cmp, %val, %identicalPhi +/// (or select %cmp, %identicalPhi, %val) +/// %to_fold = select %cmp2, %identicalPhi, %phi +/// %phi.next = select %cmp, %val, %to_fold +/// (or select %cmp, %to_fold, %val) +/// +/// Prove that %phi and %identicalPhi are the same by induction: +/// +/// Base case: Both %phi and %identicalPhi are equal on entry to the loop. +/// Inductive case: +/// Suppose %phi and %identicalPhi are equal at iteration i. +/// We look at their values at iteration i+1 which are %phi.next and +/// %identicalPhi.next. They would have become different only when %cmp is +/// false and the corresponding values %to_fold and %identicalPhi differ +/// (similar reason for the other "or" case in the bracket). +/// +/// The only condition when %to_fold and %identicalPh could differ is when %cmp2 +/// is false and %to_fold is %phi, which contradicts our inductive hypothesis +/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are +/// always equal at iteration i+1. +bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) { + if (PN.getParent() != IdenticalPN.getParent()) + return false; + if (PN.getNumIncomingValues() != 2) + return false; + + // Check that only the backedge incoming value is different. + unsigned DiffVals = 0; + BasicBlock *DiffValBB = nullptr; + for (unsigned i = 0; i < 2; i++) { + BasicBlock *PredBB = PN.getIncomingBlock(i); + if (PN.getIncomingValueForBlock(PredBB) != + IdenticalPN.getIncomingValueForBlock(PredBB)) { + DiffVals++; + DiffValBB = PredBB; + } + } + if (DiffVals != 1) + return false; + // Now check that the backedge incoming values are two select + // instructions with the same condition. Either their true + // values are the same, or their false values are the same. + auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB)); + auto *IdenticalSI = + dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB)); + if (!SI || !IdenticalSI) + return false; + if (SI->getCondition() != IdenticalSI->getCondition()) + return false; + + SelectInst *SIOtherVal = nullptr; + Value *IdenticalSIOtherVal = nullptr; + if (SI->getTrueValue() == IdenticalSI->getTrueValue()) { + SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue()); + IdenticalSIOtherVal = IdenticalSI->getFalseValue(); + } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) { + SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue()); + IdenticalSIOtherVal = IdenticalSI->getTrueValue(); + } else { + return false; + } + + // Now check that the other values in select, i.e., %to_fold and + // %identicalPhi, are essentially the same value. + if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN) + return false; + if (!(SIOtherVal->getTrueValue() == &IdenticalPN && + SIOtherVal->getFalseValue() == &PN) && + !(SIOtherVal->getTrueValue() == &PN && + SIOtherVal->getFalseValue() == &IdenticalPN)) + return false; + return true; +} + /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, @@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL); if (Imp) return *Imp ? TrueVal : FalseVal; - + // Look for same PHIs in the true and false values. + if (auto *TruePHI = dyn_cast<PHINode>(TrueVal)) + if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) { + if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI)) + return FalseVal; + if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI)) + return TrueVal; + } return nullptr; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e2af0c5..a114406 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1438,7 +1438,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BBFreqEnabled, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, - static_cast<bool>(BBAddrMapSkipEmitBBEntries), + BBAddrMapSkipEmitBBEntries, HasCalls, false}; } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index c438eae..9795a0b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -98,6 +98,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineUniformityAnalysisPassPass(Registry); initializeMIR2VecVocabLegacyAnalysisPass(Registry); initializeMIR2VecVocabPrinterLegacyPassPass(Registry); + initializeMIR2VecPrinterLegacyPassPass(Registry); initializeMachineUniformityInfoPrinterPassPass(Registry); initializeMachineVerifierLegacyPassPass(Registry); initializeObjCARCContractLegacyPassPass(Registry); diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index 5c78d98..99be1fc0 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MIR2Vec.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Module.h" @@ -29,20 +30,30 @@ using namespace mir2vec; STATISTIC(MIRVocabMissCounter, "Number of lookups to MIR entities not present in the vocabulary"); -cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options"); +namespace llvm { +namespace mir2vec { +cl::OptionCategory MIR2VecCategory("MIR2Vec Options"); // FIXME: Use a default vocab when not specified static cl::opt<std::string> VocabFile("mir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""), cl::cat(MIR2VecCategory)); -cl::opt<float> - llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), - cl::desc("Weight for machine opcode embeddings"), - cl::cat(MIR2VecCategory)); +cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for machine opcode embeddings"), + cl::cat(MIR2VecCategory)); +cl::opt<MIR2VecKind> MIR2VecEmbeddingKind( + "mir2vec-kind", cl::Optional, + cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic", + "Generate symbolic embeddings for MIR")), + cl::init(MIR2VecKind::Symbolic), cl::desc("MIR2Vec embedding kind"), + cl::cat(MIR2VecCategory)); + +} // namespace mir2vec +} // namespace llvm //===----------------------------------------------------------------------===// -// Vocabulary Implementation +// Vocabulary //===----------------------------------------------------------------------===// MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries, @@ -188,6 +199,28 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() { << " unique base opcodes\n"); } +Expected<MIRVocabulary> +MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII, + unsigned Dim) { + assert(Dim > 0 && "Dimension must be greater than zero"); + + float DummyVal = 0.1f; + + // Create dummy embeddings for all canonical opcode names + VocabMap DummyVocabMap; + for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) { + std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode)); + if (DummyVocabMap.count(BaseOpcode) == 0) { + // Only add if not already present + DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal); + DummyVal += 0.1f; + } + } + + // Create and return vocabulary with dummy embeddings + return MIRVocabulary::create(std::move(DummyVocabMap), TII); +} + //===----------------------------------------------------------------------===// // MIR2VecVocabLegacyAnalysis Implementation //===----------------------------------------------------------------------===// @@ -258,7 +291,73 @@ MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) { } //===----------------------------------------------------------------------===// -// Printer Passes Implementation +// MIREmbedder and its subclasses +//===----------------------------------------------------------------------===// + +std::unique_ptr<MIREmbedder> MIREmbedder::create(MIR2VecKind Mode, + const MachineFunction &MF, + const MIRVocabulary &Vocab) { + switch (Mode) { + case MIR2VecKind::Symbolic: + return std::make_unique<SymbolicMIREmbedder>(MF, Vocab); + } + return nullptr; +} + +Embedding MIREmbedder::computeEmbeddings(const MachineBasicBlock &MBB) const { + Embedding MBBVector(Dimension, 0); + + // Get instruction info for opcode name resolution + const auto &Subtarget = MF.getSubtarget(); + const auto *TII = Subtarget.getInstrInfo(); + if (!TII) { + MF.getFunction().getContext().emitError( + "MIR2Vec: No TargetInstrInfo available; cannot compute embeddings"); + return MBBVector; + } + + // Process each machine instruction in the basic block + for (const auto &MI : MBB) { + // Skip debug instructions and other metadata + if (MI.isDebugInstr()) + continue; + MBBVector += computeEmbeddings(MI); + } + + return MBBVector; +} + +Embedding MIREmbedder::computeEmbeddings() const { + Embedding MFuncVector(Dimension, 0); + + // Consider all reachable machine basic blocks in the function + for (const auto *MBB : depth_first(&MF)) + MFuncVector += computeEmbeddings(*MBB); + return MFuncVector; +} + +SymbolicMIREmbedder::SymbolicMIREmbedder(const MachineFunction &MF, + const MIRVocabulary &Vocab) + : MIREmbedder(MF, Vocab) {} + +std::unique_ptr<SymbolicMIREmbedder> +SymbolicMIREmbedder::create(const MachineFunction &MF, + const MIRVocabulary &Vocab) { + return std::make_unique<SymbolicMIREmbedder>(MF, Vocab); +} + +Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const { + // Skip debug instructions and other metadata + if (MI.isDebugInstr()) + return Embedding(Dimension, 0); + + // Todo: Add operand/argument contributions + + return Vocab[MI.getOpcode()]; +} + +//===----------------------------------------------------------------------===// +// Printer Passes //===----------------------------------------------------------------------===// char MIR2VecVocabPrinterLegacyPass::ID = 0; @@ -297,3 +396,56 @@ MachineFunctionPass * llvm::createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS) { return new MIR2VecVocabPrinterLegacyPass(OS); } + +char MIR2VecPrinterLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(MIR2VecPrinterLegacyPass, "print-mir2vec", + "MIR2Vec Embedder Printer Pass", false, true) +INITIALIZE_PASS_DEPENDENCY(MIR2VecVocabLegacyAnalysis) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) +INITIALIZE_PASS_END(MIR2VecPrinterLegacyPass, "print-mir2vec", + "MIR2Vec Embedder Printer Pass", false, true) + +bool MIR2VecPrinterLegacyPass::runOnMachineFunction(MachineFunction &MF) { + auto &Analysis = getAnalysis<MIR2VecVocabLegacyAnalysis>(); + auto VocabOrErr = + Analysis.getMIR2VecVocabulary(*MF.getFunction().getParent()); + assert(VocabOrErr && "Failed to get MIR2Vec vocabulary"); + auto &MIRVocab = *VocabOrErr; + + auto Emb = mir2vec::MIREmbedder::create(MIR2VecEmbeddingKind, MF, MIRVocab); + if (!Emb) { + OS << "Error creating MIR2Vec embeddings for function " << MF.getName() + << "\n"; + return false; + } + + OS << "MIR2Vec embeddings for machine function " << MF.getName() << ":\n"; + OS << "Machine Function vector: "; + Emb->getMFunctionVector().print(OS); + + OS << "Machine basic block vectors:\n"; + for (const MachineBasicBlock &MBB : MF) { + OS << "Machine basic block: " << MBB.getFullName() << ":\n"; + Emb->getMBBVector(MBB).print(OS); + } + + OS << "Machine instruction vectors:\n"; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + // Skip debug instructions as they are not + // embedded + if (MI.isDebugInstr()) + continue; + + OS << "Machine instruction: "; + MI.print(OS); + Emb->getMInstVector(MI).print(OS); + } + } + + return false; +} + +MachineFunctionPass *llvm::createMIR2VecPrinterLegacyPass(raw_ostream &OS) { + return new MIR2VecPrinterLegacyPass(OS); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 437d0f4..bf1abfe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3765,6 +3765,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::LRINT: case ISD::LLRINT: + case ISD::LROUND: + case ISD::LLROUND: Res = SoftPromoteHalfOp_Op0WithStrict(N); break; case ISD::FP_TO_SINT_SAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 88a4a8b..b1776ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -429,7 +429,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op2 = N->getOperand(2); + switch (TLI.getExtendForAtomicRMWArg(N->getOpcode())) { + case ISD::SIGN_EXTEND: + Op2 = SExtPromotedInteger(Op2); + break; + case ISD::ZERO_EXTEND: + Op2 = ZExtPromotedInteger(Op2); + break; + case ISD::ANY_EXTEND: + Op2 = GetPromotedInteger(Op2); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 42ec8ba..7cce033 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in { - defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>; - defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>; - defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>; - defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>; + defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; } defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 35e1127..b1a668e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1089,7 +1089,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, // Register based DivRem for AEABI (RTABI 4.2) if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() || - TT.isTargetMuslAEABI() || TT.isOSWindows()) { + TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; @@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::LRINT, MVT::f16, Expand); + setOperationAction(ISD::LROUND, MVT::f16, Expand); setOperationAction(ISD::FROUND, MVT::f16, Legal); setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); @@ -20574,7 +20575,7 @@ static TargetLowering::ArgListTy getDivRemArgList( SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || - Subtarget->isTargetWindows()) && + Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index b2d368e..4a0883c 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -343,6 +343,7 @@ public: bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); } + bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index c8866bf..42e90f0 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -294,6 +294,14 @@ public: if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) RootSignature->eraseFromParent(); + // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and + // causes all tests using the DXIL Validator to fail. + // + // This is a temporary fix and should be replaced with a whitelist once + // we have determined all metadata that the DXIL Validator allows + if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) + ErrNo->eraseFromParent(); + return true; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a94e131..54c8972 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - if (Subtarget.useHVX128BOps()) + if (Subtarget.useHVX128BOps()) { setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); + setOperationAction(ISD::BITCAST, MVT::v64i1, Custom); + } if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { @@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { // Handle bitcast from i32, v2i16, and v4i8 to v32i1. // Splat the input into a 32-element i32 vector, then AND each element // with a unique bitmask to isolate individual bits. - if (ResTy == MVT::v32i1 && - (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && - Subtarget.useHVX128BOps()) { - SDValue Val32 = Val; - if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) - Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); - + auto bitcastI32ToV32I1 = [&](SDValue Val32) { + assert(Val32.getValueType().getSizeInBits() == 32 && + "Input must be 32 bits"); MVT VecTy = MVT::getVectorVT(MVT::i32, 32); SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32); SmallVector<SDValue, 32> Mask; @@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask); SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec); - return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded); + return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded); + }; + // === Case: v32i1 === + if (ResTy == MVT::v32i1 && + (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && + Subtarget.useHVX128BOps()) { + SDValue Val32 = Val; + if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) + Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); + return bitcastI32ToV32I1(Val32); + } + // === Case: v64i1 === + if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) { + // Split i64 into lo/hi 32-bit halves. + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val); + SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted); + + // Reuse the same 32-bit logic twice. + SDValue LoRes = bitcastI32ToV32I1(Lo); + SDValue HiRes = bitcastI32ToV32I1(Hi); + + // Concatenate into a v64i1 predicate. + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes); } if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 5dd4bf4..98b636e 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { + case RISCV::PseudoAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32, + NextMBBI); + case RISCV::PseudoAtomicSwap64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadAdd64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadSub64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadAnd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadAnd64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadOr32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI); + case RISCV::PseudoAtomicLoadOr64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI); + case RISCV::PseudoAtomicLoadXor32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadXor64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64, + NextMBBI); case RISCV::PseudoAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, NextMBBI); case RISCV::PseudoAtomicLoadNand64: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64, NextMBBI); + case RISCV::PseudoAtomicLoadMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadMin64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadMax64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadUMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadUMin64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64, + NextMBBI); + case RISCV::PseudoAtomicLoadUMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32, + NextMBBI); + case RISCV::PseudoAtomicLoadUMax64: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64, + NextMBBI); case RISCV::PseudoMaskedAtomicSwap32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, NextMBBI); @@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, switch (BinOp) { default: llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(IncrReg) + .addImm(0); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::And: + BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Or: + BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Xor: + BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; case AtomicRMWInst::Nand: BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) .addReg(DestReg) @@ -433,38 +521,85 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, .addReg(ShamtReg); } -bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, - MachineBasicBlock::iterator &NextMBBI) { - assert(IsMasked == true && - "Should only need to expand masked atomic max/min"); - assert(Width == 32 && "Should never need to expand masked 64-bit operations"); +static void doAtomicMinMaxOpExpansion( + const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB, + MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI.getOperand(4).getImm()); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - MachineFunction *MF = MBB.getParent(); - auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + // .loophead: + // lr.[w|d] dest, (addr) + // mv scratch, dest + // ifnochangeneeded scratch, incr, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg) + .addReg(AddrReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(DestReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Max: { + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(ScratchReg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::Min: { + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(IncrReg) + .addReg(ScratchReg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::UMax: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(ScratchReg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::UMin: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(IncrReg) + .addReg(ScratchReg) + .addMBB(LoopTailMBB); + break; + } - // Insert new MBBs. - MF->insert(++MBB.getIterator(), LoopHeadMBB); - MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); - MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); - MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + // .loopifbody: + // mv scratch, incr + BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(IncrReg) + .addImm(0); - // Set up successors and transfer remaining instructions to DoneMBB. - LoopHeadMBB->addSuccessor(LoopIfBodyMBB); - LoopHeadMBB->addSuccessor(LoopTailMBB); - LoopIfBodyMBB->addSuccessor(LoopTailMBB); - LoopTailMBB->addSuccessor(LoopHeadMBB); - LoopTailMBB->addSuccessor(DoneMBB); - DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); - DoneMBB->transferSuccessors(&MBB); - MBB.addSuccessor(LoopHeadMBB); + // .looptail: + // sc.[w|d] scratch, scratch, (addr) + // bnez scratch, loop + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg); + BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopHeadMBB); +} +static void doMaskedAtomicMinMaxOpExpansion( + const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB, + MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); Register DestReg = MI.getOperand(0).getReg(); Register Scratch1Reg = MI.getOperand(1).getReg(); Register Scratch2Reg = MI.getOperand(2).getReg(); @@ -541,6 +676,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( .addReg(Scratch1Reg) .addReg(RISCV::X0) .addMBB(LoopHeadMBB); +} + +bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + if (!IsMasked) + doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB, + LoopTailMBB, DoneMBB, BinOp, Width, STI); + else + doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, + LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp, + Width, STI); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 19992e6..3abbbb3 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -218,6 +218,7 @@ def HasStdExtZaamo : Predicate<"Subtarget->hasStdExtZaamo()">, AssemblerPredicate<(any_of FeatureStdExtZaamo), "'Zaamo' (Atomic Memory Operations)">; +def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">; def FeatureStdExtZalrsc : RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">; @@ -1864,7 +1865,7 @@ def FeatureForcedAtomics : SubtargetFeature< "forced-atomics", "HasForcedAtomics", "true", "Assume that lock-free native-width atomics are available">; def HasAtomicLdSt - : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">; + : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">; def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", "AllowTaggedGlobals", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a77d765..26fe9ed 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, else if (Subtarget.hasStdExtZicbop()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); - if (Subtarget.hasStdExtA()) { + if (Subtarget.hasStdExtZalrsc()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) setMinCmpXchgSizeInBits(8); @@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } - if (Subtarget.hasStdExtA()) + if (Subtarget.hasStdExtZaamo()) setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); if (Subtarget.hasForcedAtomics()) { @@ -21875,7 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( // result is then sign extended to XLEN. With +A, the minimum width is // 32 for both 64 and 32. assert(getMinCmpXchgSizeInBits() == 32); - assert(Subtarget.hasStdExtA()); + assert(Subtarget.hasStdExtZalrsc()); return Op.getValueSizeInBits() - 31; } break; @@ -24471,6 +24471,25 @@ ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; } +ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const { + // Zaamo will use amo<op>.w which does not require extension. + if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics()) + return ISD::ANY_EXTEND; + + // Zalrsc pseudo expansions with comparison require sign-extension. + assert(Subtarget.hasStdExtZalrsc()); + switch (Op) { + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return ISD::SIGN_EXTEND; + default: + break; + } + return ISD::ANY_EXTEND; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3f81ed7..9e3e2a9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -245,6 +245,7 @@ public: } ISD::NodeType getExtendForAtomicCmpSwapArg() const override; + ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 571d72f..5c81a09 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -158,9 +158,9 @@ class seq_cst_store<PatFrag base> } } // IsAtomic = 1 -// Atomic load/store are available under both +a and +force-atomics. -// Fences will be inserted for atomic load/stores according to the logic in -// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}. +// Atomic load/store are available under +zalrsc (thus also +a) and +// +force-atomics. Fences will be inserted for atomic load/stores according to +// the logic in RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}. // The normal loads/stores are relaxed (unordered) loads/stores that don't have // any ordering. This is necessary because AtomicExpandPass has added fences to // atomic load/stores and changed them to unordered ones. @@ -308,7 +308,65 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; -let Predicates = [HasStdExtA] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZaamo] in { + +let Size = 16 in { +def PseudoAtomicSwap32 : PseudoAMO; +def PseudoAtomicLoadAdd32 : PseudoAMO; +def PseudoAtomicLoadSub32 : PseudoAMO; +def PseudoAtomicLoadAnd32 : PseudoAMO; +def PseudoAtomicLoadOr32 : PseudoAMO; +def PseudoAtomicLoadXor32 : PseudoAMO; +} // Size = 16 +let Size = 24 in { +def PseudoAtomicLoadMax32 : PseudoAMO; +def PseudoAtomicLoadMin32 : PseudoAMO; +def PseudoAtomicLoadUMax32 : PseudoAMO; +def PseudoAtomicLoadUMin32 : PseudoAMO; +} // Size = 24 + +defm : PseudoAMOPat<"atomic_swap_i32", PseudoAtomicSwap32>; +defm : PseudoAMOPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>; +defm : PseudoAMOPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>; +defm : PseudoAMOPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>; +defm : PseudoAMOPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>; +defm : PseudoAMOPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>; +defm : PseudoAMOPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>; +defm : PseudoAMOPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>; +defm : PseudoAMOPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>; +defm : PseudoAMOPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>; +} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo] + +let Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] in { + +let Size = 16 in { +def PseudoAtomicSwap64 : PseudoAMO; +def PseudoAtomicLoadAdd64 : PseudoAMO; +def PseudoAtomicLoadSub64 : PseudoAMO; +def PseudoAtomicLoadAnd64 : PseudoAMO; +def PseudoAtomicLoadOr64 : PseudoAMO; +def PseudoAtomicLoadXor64 : PseudoAMO; +} // Size = 16 +let Size = 24 in { +def PseudoAtomicLoadMax64 : PseudoAMO; +def PseudoAtomicLoadMin64 : PseudoAMO; +def PseudoAtomicLoadUMax64 : PseudoAMO; +def PseudoAtomicLoadUMin64 : PseudoAMO; +} // Size = 24 + +defm : PseudoAMOPat<"atomic_swap_i64", PseudoAtomicSwap64, i64>; +defm : PseudoAMOPat<"atomic_load_add_i64", PseudoAtomicLoadAdd64, i64>; +defm : PseudoAMOPat<"atomic_load_sub_i64", PseudoAtomicLoadSub64, i64>; +defm : PseudoAMOPat<"atomic_load_and_i64", PseudoAtomicLoadAnd64, i64>; +defm : PseudoAMOPat<"atomic_load_or_i64", PseudoAtomicLoadOr64, i64>; +defm : PseudoAMOPat<"atomic_load_xor_i64", PseudoAtomicLoadXor64, i64>; +defm : PseudoAMOPat<"atomic_load_max_i64", PseudoAtomicLoadMax64, i64>; +defm : PseudoAMOPat<"atomic_load_min_i64", PseudoAtomicLoadMin64, i64>; +defm : PseudoAMOPat<"atomic_load_umax_i64", PseudoAtomicLoadUMax64, i64>; +defm : PseudoAMOPat<"atomic_load_umin_i64", PseudoAtomicLoadUMin64, i64>; +} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] + +let Predicates = [HasStdExtZalrsc] in { let Size = 20 in def PseudoAtomicLoadNand32 : PseudoAMO; @@ -347,14 +405,14 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax, PseudoMaskedAtomicLoadUMax32>; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin, PseudoMaskedAtomicLoadUMin32>; -} // Predicates = [HasStdExtA] +} // Predicates = [HasStdExtZalrsc] -let Predicates = [HasStdExtA, IsRV64] in { +let Predicates = [HasStdExtZalrsc, IsRV64] in { let Size = 20 in def PseudoAtomicLoadNand64 : PseudoAMO; defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>; -} // Predicates = [HasStdExtA, IsRV64] +} // Predicates = [HasStdExtZalrsc, IsRV64] /// Compare and exchange @@ -385,17 +443,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } -let Predicates = [HasStdExtA, NoStdExtZacas] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZacas] in { def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>; } -let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in { +let Predicates = [HasStdExtZalrsc, NoStdExtZacas, IsRV64] in { def PseudoCmpXchg64 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>; } -let Predicates = [HasStdExtA] in { +let Predicates = [HasStdExtZalrsc] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, @@ -412,4 +470,4 @@ def : Pat<(XLenVT (int_riscv_masked_cmpxchg (XLenVT GPR:$mask), (XLenVT timm:$ordering))), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -} // Predicates = [HasStdExtA] +} // Predicates = [HasStdExtZalrsc] diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 5591d9f..021353a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -355,9 +355,9 @@ private: SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const; bool extractSubvector(Register &ResVReg, const SPIRVType *ResType, Register &ReadReg, MachineInstr &InsertionPoint) const; - bool generateImageRead(Register &ResVReg, const SPIRVType *ResType, - Register ImageReg, Register IdxReg, DebugLoc Loc, - MachineInstr &Pos) const; + bool generateImageReadOrFetch(Register &ResVReg, const SPIRVType *ResType, + Register ImageReg, Register IdxReg, + DebugLoc Loc, MachineInstr &Pos) const; bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const; bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue, Register ResVReg, const SPIRVType *ResType, @@ -1321,8 +1321,8 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg, } Register IdxReg = IntPtrDef->getOperand(3).getReg(); - return generateImageRead(ResVReg, ResType, NewHandleReg, IdxReg, - I.getDebugLoc(), I); + return generateImageReadOrFetch(ResVReg, ResType, NewHandleReg, IdxReg, + I.getDebugLoc(), I); } } @@ -3639,27 +3639,33 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic( DebugLoc Loc = I.getDebugLoc(); MachineInstr &Pos = I; - return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos); + return generateImageReadOrFetch(ResVReg, ResType, NewImageReg, IdxReg, Loc, + Pos); } -bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg, - const SPIRVType *ResType, - Register ImageReg, - Register IdxReg, DebugLoc Loc, - MachineInstr &Pos) const { +bool SPIRVInstructionSelector::generateImageReadOrFetch( + Register &ResVReg, const SPIRVType *ResType, Register ImageReg, + Register IdxReg, DebugLoc Loc, MachineInstr &Pos) const { SPIRVType *ImageType = GR.getSPIRVTypeForVReg(ImageReg); assert(ImageType && ImageType->getOpcode() == SPIRV::OpTypeImage && "ImageReg is not an image type."); + bool IsSignedInteger = sampledTypeIsSignedInteger(GR.getTypeForSPIRVType(ImageType)); + // Check if the "sampled" operand of the image type is 1. + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpImageFetch + auto SampledOp = ImageType->getOperand(6); + bool IsFetch = (SampledOp.getImm() == 1); uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType); if (ResultSize == 4) { - auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(ImageReg) - .addUse(IdxReg); + auto BMI = + BuildMI(*Pos.getParent(), Pos, Loc, + TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(ImageReg) + .addUse(IdxReg); if (IsSignedInteger) BMI.addImm(0x1000); // SignExtend @@ -3668,11 +3674,13 @@ bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg, SPIRVType *ReadType = widenTypeToVec4(ResType, Pos); Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType)); - auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) - .addDef(ReadReg) - .addUse(GR.getSPIRVTypeID(ReadType)) - .addUse(ImageReg) - .addUse(IdxReg); + auto BMI = + BuildMI(*Pos.getParent(), Pos, Loc, + TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead)) + .addDef(ReadReg) + .addUse(GR.getSPIRVTypeID(ReadType)) + .addUse(ImageReg) + .addUse(IdxReg); if (IsSignedInteger) BMI.addImm(0x1000); // SignExtend bool Succeed = BMI.constrainAllUses(TII, TRI, RBI); diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 7882045..0fce5b9 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -567,8 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT) { default: if (TT.isOSNetBSD()) return "apcs-gnu"; - if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() || - TT.isOHOSFamily()) + if (TT.isOSFreeBSD() || TT.isOSFuchsia() || TT.isOSOpenBSD() || + TT.isOSHaiku() || TT.isOHOSFamily()) return "aapcs-linux"; return "aapcs"; } @@ -648,6 +648,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) { } case llvm::Triple::OpenBSD: return "cortex-a8"; + case llvm::Triple::Fuchsia: + return "cortex-a53"; default: switch (Triple.getEnvironment()) { case llvm::Triple::EABIHF: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 975498f..5aa8de3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // select a, false, b -> select !a, b, false if (match(TrueVal, m_Specific(Zero))) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, FalseVal, Zero); + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom); + NewSI->swapProfMetadata(); + return NewSI; } // select a, b, true -> select !a, true, b if (match(FalseVal, m_Specific(One))) { Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); - return SelectInst::Create(NotCond, One, TrueVal); + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom); + NewSI->swapProfMetadata(); + return NewSI; } // DeMorgan in select form: !a && !b --> !(a || b) // select !a, !b, false --> not (select a, true, b) if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) && (CondVal->hasOneUse() || TrueVal->hasOneUse()) && - !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) - return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B)); + !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) { + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom)); + NewSI->swapProfMetadata(); + return BinaryOperator::CreateNot(NewSI); + } // DeMorgan in select form: !a || !b --> !(a && b) // select !a, true, !b --> not (select a, b, false) if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) && (CondVal->hasOneUse() || FalseVal->hasOneUse()) && - !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) - return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero)); + !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) { + Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI; + SelectInst *NewSI = + cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom)); + NewSI->swapProfMetadata(); + return BinaryOperator::CreateNot(NewSI); + } // select (select a, true, b), true, b -> select a, true, b if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) && diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 40720ae..29968b8 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -131,7 +132,7 @@ cl::opt<uint64_t> ClFallbackToken( //===--- Statistics -------------------------------------------------------===// -STATISTIC(NumFunctionsInstrumented, "Functions instrumented"); +STATISTIC(NumFunctionsModified, "Functions modified"); STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); //===----------------------------------------------------------------------===// @@ -140,9 +141,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); /// /// Expected format is: !{<type-name>, <contains-pointer>} MDNode *getAllocTokenMetadata(const CallBase &CB) { - MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token); - if (!Ret) - return nullptr; + MDNode *Ret = nullptr; + if (auto *II = dyn_cast<IntrinsicInst>(&CB); + II && II->getIntrinsicID() == Intrinsic::alloc_token_id) { + auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0)); + Ret = cast<MDNode>(MDV->getMetadata()); + // If the intrinsic has an empty MDNode, type inference failed. + if (Ret->getNumOperands() == 0) + return nullptr; + } else { + Ret = CB.getMetadata(LLVMContext::MD_alloc_token); + if (!Ret) + return nullptr; + } assert(Ret->getNumOperands() == 2 && "bad !alloc_token"); assert(isa<MDString>(Ret->getOperand(0))); assert(isa<ConstantAsMetadata>(Ret->getOperand(1))); @@ -315,6 +326,9 @@ private: FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID, LibFunc OriginalFunc); + /// Lower alloc_token_* intrinsics. + void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE); + /// Return the token ID from metadata in the call. uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) { return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode); @@ -336,21 +350,32 @@ bool AllocToken::instrumentFunction(Function &F) { // Do not apply any instrumentation for naked functions. if (F.hasFnAttribute(Attribute::Naked)) return false; - if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) - return false; // Don't touch available_externally functions, their actual body is elsewhere. if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; - // Only instrument functions that have the sanitize_alloc_token attribute. - if (!F.hasFnAttribute(Attribute::SanitizeAllocToken)) - return false; auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls; + SmallVector<IntrinsicInst *, 4> IntrinsicInsts; + + // Only instrument functions that have the sanitize_alloc_token attribute. + const bool InstrumentFunction = + F.hasFnAttribute(Attribute::SanitizeAllocToken) && + !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); // Collect all allocation calls to avoid iterator invalidation. for (Instruction &I : instructions(F)) { + // Collect all alloc_token_* intrinsics. + if (auto *II = dyn_cast<IntrinsicInst>(&I); + II && II->getIntrinsicID() == Intrinsic::alloc_token_id) { + IntrinsicInsts.emplace_back(II); + continue; + } + + if (!InstrumentFunction) + continue; + auto *CB = dyn_cast<CallBase>(&I); if (!CB) continue; @@ -359,11 +384,21 @@ bool AllocToken::instrumentFunction(Function &F) { } bool Modified = false; - for (auto &[CB, Func] : AllocCalls) - Modified |= replaceAllocationCall(CB, Func, ORE, TLI); - if (Modified) - NumFunctionsInstrumented++; + if (!AllocCalls.empty()) { + for (auto &[CB, Func] : AllocCalls) + Modified |= replaceAllocationCall(CB, Func, ORE, TLI); + if (Modified) + NumFunctionsModified++; + } + + if (!IntrinsicInsts.empty()) { + for (auto *II : IntrinsicInsts) + replaceIntrinsicInst(II, ORE); + Modified = true; + NumFunctionsModified++; + } + return Modified; } @@ -381,7 +416,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB, if (TLI.getLibFunc(*Callee, Func)) { if (isInstrumentableLibFunc(Func, CB, TLI)) return Func; - } else if (Options.Extended && getAllocTokenMetadata(CB)) { + } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) { return NotLibFunc; } @@ -528,6 +563,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB, return TokenAlloc; } +void AllocToken::replaceIntrinsicInst(IntrinsicInst *II, + OptimizationRemarkEmitter &ORE) { + assert(II->getIntrinsicID() == Intrinsic::alloc_token_id); + + uint64_t TokenID = getToken(*II, ORE); + Value *V = ConstantInt::get(IntPtrTy, TokenID); + II->replaceAllUsesWith(V); + II->eraseFromParent(); +} + } // namespace AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index b4e4dc2..c95c887 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { return Parent->getEnclosingBlockWithPredecessors(); } -bool VPBlockUtils::isHeader(const VPBlockBase *VPB, - const VPDominatorTree &VPDT) { - auto *VPBB = dyn_cast<VPBasicBlock>(VPB); - if (!VPBB) - return false; - - // If VPBB is in a region R, VPBB is a loop header if R is a loop region with - // VPBB as its entry, i.e., free of predecessors. - if (auto *R = VPBB->getParent()) - return !R->isReplicator() && !VPBB->hasPredecessors(); - - // A header dominates its second predecessor (the latch), with the other - // predecessor being the preheader - return VPB->getPredecessors().size() == 2 && - VPDT.dominates(VPB, VPB->getPredecessors()[1]); -} - -bool VPBlockUtils::isLatch(const VPBlockBase *VPB, - const VPDominatorTree &VPDT) { - // A latch has a header as its second successor, with its other successor - // leaving the loop. A preheader OTOH has a header as its first (and only) - // successor. - return VPB->getNumSuccessors() == 2 && - VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); -} - VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { iterator It = begin(); while (It != end() && It->isPhi()) @@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { VPRegionBlock *VPRegionBlock::clone() { const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); - auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, - getName(), isReplicator()); + VPlan &Plan = *getPlan(); + VPRegionBlock *NewRegion = + isReplicator() + ? Plan.createReplicateRegion(NewEntry, NewExiting, getName()) + : Plan.createLoopRegion(getName(), NewEntry, NewExiting); + for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) Block->setParent(NewRegion); return NewRegion; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8274431..167ba55 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4450,22 +4450,24 @@ public: return VPB; } - /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p - /// IsReplicator is true, the region is a replicate region. The returned block - /// is owned by the VPlan and deleted once the VPlan is destroyed. - VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, - const std::string &Name = "", - bool IsReplicator = false) { - auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator); + /// Create a new loop region with \p Name and entry and exiting blocks set + /// to \p Entry and \p Exiting respectively, if set. The returned block is + /// owned by the VPlan and deleted once the VPlan is destroyed. + VPRegionBlock *createLoopRegion(const std::string &Name = "", + VPBlockBase *Entry = nullptr, + VPBlockBase *Exiting = nullptr) { + auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name) + : new VPRegionBlock(Name); CreatedBlocks.push_back(VPB); return VPB; } - /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set - /// to nullptr. The returned block is owned by the VPlan and deleted once the - /// VPlan is destroyed. - VPRegionBlock *createVPRegionBlock(const std::string &Name = "") { - auto *VPB = new VPRegionBlock(Name); + /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The + /// returned block is owned by the VPlan and deleted once the VPlan is + /// destroyed. + VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, + const std::string &Name = "") { + auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true); CreatedBlocks.push_back(VPB); return VPB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 332791a..65688a3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { // LatchExitVPB, taking care to preserve the original predecessor & successor // order of blocks. Set region entry and exiting after both HeaderVPB and // LatchVPBB have been disconnected from their predecessors/successors. - auto *R = Plan.createVPRegionBlock(); + auto *R = Plan.createLoopRegion(); VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R); VPBlockUtils::disconnectBlocks(LatchVPBB, R); VPBlockUtils::connectBlocks(PreheaderVPBB, R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7bf8d83..ff25ef5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, auto *Exiting = Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); VPRegionBlock *Region = - Plan.createVPRegionBlock(Entry, Exiting, RegionName, true); + Plan.createReplicateRegion(Entry, Exiting, RegionName); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 10801c0..32e4b88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -8,6 +8,7 @@ #include "VPlanUtils.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "VPlanPatternMatch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan, return UncountableCondition; } + +bool VPBlockUtils::isHeader(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + auto *VPBB = dyn_cast<VPBasicBlock>(VPB); + if (!VPBB) + return false; + + // If VPBB is in a region R, VPBB is a loop header if R is a loop region with + // VPBB as its entry, i.e., free of predecessors. + if (auto *R = VPBB->getParent()) + return !R->isReplicator() && !VPBB->hasPredecessors(); + + // A header dominates its second predecessor (the latch), with the other + // predecessor being the preheader + return VPB->getPredecessors().size() == 2 && + VPDT.dominates(VPB, VPB->getPredecessors()[1]); +} + +bool VPBlockUtils::isLatch(const VPBlockBase *VPB, + const VPDominatorTree &VPDT) { + // A latch has a header as its second successor, with its other successor + // leaving the loop. A preheader OTOH has a header as its first (and only) + // successor. + return VPB->getNumSuccessors() == 2 && + VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); +} diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll index 00c6656..b3a7057 100644 --- a/llvm/test/CodeGen/AMDGPU/add-max.ll +++ b/llvm/test/CodeGen/AMDGPU/add-max.ll @@ -5,7 +5,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_u32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) @@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_u32_svv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1 +; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) @@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) { define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { ; SDAG-LABEL: add_max_u32_ssv: ; SDAG: ; %bb.0: -; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0 +; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0 ; SDAG-NEXT: ; return to shader part epilog ; ; GISEL-LABEL: add_max_u32_ssv: @@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) { ; GCN-LABEL: add_max_u32_vsi: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4 +; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 4) @@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) { define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) { ; GCN-LABEL: add_max_u32_svl: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64 +; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umax.i32(i32 %add, i32 100) @@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) { define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) { ; SDAG-LABEL: add_max_u32_slv: ; SDAG: ; %bb.0: -; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0 +; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0 ; SDAG-NEXT: ; return to shader part epilog ; ; GISEL-LABEL: add_max_u32_slv: @@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) { define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_max_i32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.smax.i32(i32 %add, i32 %c) @@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_min_u32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.umin.i32(i32 %add, i32 %c) @@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: add_min_i32_vvv: ; GCN: ; %bb.0: -; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2 +; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2 ; GCN-NEXT: ; return to shader part epilog %add = add i32 %a, %b %max = call i32 @llvm.smin.i32(i32 %add, i32 %c) diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 7ee0015f..711d57b 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) { ; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2 +; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2 ; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0 @@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) { ; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4 ; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5 -; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4 +; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5 +; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1] ; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3] @@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8 -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7 +; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6 +; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6 ; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3] ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5] @@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4 ; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8 +; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8 ; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54 ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2 @@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7 ; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6 -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7 +; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6 +; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3] ; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1] @@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) { ; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8 +; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54 ; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2 @@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) { ; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14 -; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9 +; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8 +; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8 ; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5] ; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11 -; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10 +; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11 +; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11 ; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3] ; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6 ; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4 diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll index f734db8..20fe272 100644 --- a/llvm/test/CodeGen/ARM/llround-conv.ll +++ b/llvm/test/CodeGen/ARM/llround-conv.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 define i64 @testmsxh_builtin(half %x) { @@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) { ; CHECK-NOFP16-NEXT: bl llroundf ; CHECK-NOFP16-NEXT: pop {r11, pc} ; +; CHECK-FPv8-LABEL: testmsxh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: .save {r11, lr} +; CHECK-FPv8-NEXT: push {r11, lr} +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: bl llroundf +; CHECK-FPv8-NEXT: pop {r11, pc} +; ; CHECK-FP16-LABEL: testmsxh_builtin: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r11, lr} diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll index 03f7a0d..7466bcb 100644 --- a/llvm/test/CodeGen/ARM/lround-conv.ll +++ b/llvm/test/CodeGen/ARM/lround-conv.ll @@ -4,11 +4,39 @@ ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 ; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -;define i32 @testmswh_builtin(half %x) { -;entry: -; %0 = tail call i32 @llvm.lround.i32.f16(half %x) -; ret i32 %0 -;} +define i32 @testmswh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmswh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: pop {r11, lr} +; CHECK-SOFT-NEXT: b lroundf +; +; CHECK-NOFP16-LABEL: testmswh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: pop {r11, lr} +; CHECK-NOFP16-NEXT: b lroundf +; +; CHECK-FPv8-LABEL: testmswh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: b lroundf +; +; CHECK-FP16-LABEL: testmswh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vcvta.s32.f16 s0, s0 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.lround.i32.f16(half %x) + ret i32 %0 +} define i32 @testmsws_builtin(float %x) { ; CHECK-LABEL: testmsws_builtin: @@ -40,8 +68,3 @@ entry: ret i32 %0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-FP16: {{.*}} -; CHECK-FPv8: {{.*}} -; CHECK-NOFP16: {{.*}} -; CHECK-SOFT: {{.*}} diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll new file mode 100644 index 0000000..9190d03 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -dxil-prepare < %s | FileCheck %s + +; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata + +target triple = "dxil-unknown-shadermodel6.0-compute" + +define void @main() { +entry: + ret void +} + +; CHECK-NOT: !llvm.errno.tbaa +; CHECK-NOT: {{^!}} + +!llvm.errno.tbaa = !{!0} + +!0 = !{!1, !1, i64 0} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll new file mode 100644 index 0000000..f7e5cdb --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll @@ -0,0 +1,33 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s +; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1) +; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0) +; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4) +; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]]) +; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0) +; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]]) +; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]]) +; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]]) +; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}}) +; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}}) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}}) +; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}}) +; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}}) +define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) { +entry: + %load = load i64, ptr %in, align 4 + %bitcast = bitcast i64 %load to <64 x i1> + %e0 = extractelement <64 x i1> %bitcast, i32 0 + %e1 = extractelement <64 x i1> %bitcast, i32 1 + %z0 = zext i1 %e0 to i8 + %z1 = zext i1 %e1 to i8 + %ptr0 = getelementptr i8, ptr %out, i32 0 + %ptr1 = getelementptr i8, ptr %out, i32 1 + store i8 %z0, ptr %ptr0, align 1 + store i8 %z1, ptr %ptr1, align 1 + ret void +} + diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json new file mode 100644 index 0000000..5de715b --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json @@ -0,0 +1,22 @@ +{ + "entities": { + "KILL": [0.1, 0.2, 0.3], + "MOV": [0.4, 0.5, 0.6], + "LEA": [0.7, 0.8, 0.9], + "RET": [1.0, 1.1, 1.2], + "ADD": [1.3, 1.4, 1.5], + "SUB": [1.6, 1.7, 1.8], + "IMUL": [1.9, 2.0, 2.1], + "AND": [2.2, 2.3, 2.4], + "OR": [2.5, 2.6, 2.7], + "XOR": [2.8, 2.9, 3.0], + "CMP": [3.1, 3.2, 3.3], + "TEST": [3.4, 3.5, 3.6], + "JMP": [3.7, 3.8, 3.9], + "CALL": [4.0, 4.1, 4.2], + "PUSH": [4.3, 4.4, 4.5], + "POP": [4.6, 4.7, 4.8], + "NOP": [4.9, 5.0, 5.1], + "COPY": [5.2, 5.3, 5.4] + } +}
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/if-else.mir b/llvm/test/CodeGen/MIR2Vec/if-else.mir new file mode 100644 index 0000000..5734a23 --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/if-else.mir @@ -0,0 +1,144 @@ +# REQUIRES: x86-registered-target +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + define dso_local i32 @abc(i32 noundef %a, i32 noundef %b) { + entry: + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %b.addr, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + + if.then: ; preds = %entry + %2 = load i32, ptr %b.addr, align 4 + store i32 %2, ptr %retval, align 4 + br label %return + + if.else: ; preds = %entry + %3 = load i32, ptr %a.addr, align 4 + store i32 %3, ptr %retval, align 4 + br label %return + + return: ; preds = %if.else, %if.then + %4 = load i32, ptr %retval, align 4 + ret i32 %4 + } +... +--- +name: abc +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr32, preferred-register: '', flags: [ ] } + - { id: 1, class: gr32, preferred-register: '', flags: [ ] } + - { id: 2, class: gr32, preferred-register: '', flags: [ ] } + - { id: 3, class: gr32, preferred-register: '', flags: [ ] } + - { id: 4, class: gr32, preferred-register: '', flags: [ ] } + - { id: 5, class: gr32, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 +fixedStack: [] +stack: + - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: a.addr, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: b.addr, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + amxProgModel: None +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $esi + + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + MOV32mr %stack.1.a.addr, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.a.addr) + MOV32mr %stack.2.b.addr, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %ir.b.addr) + %2:gr32 = SUB32rr %0, %1, implicit-def $eflags + JCC_1 %bb.2, 14, implicit $eflags + JMP_1 %bb.1 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %4:gr32 = MOV32rm %stack.2.b.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.b.addr) + MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into %ir.retval) + JMP_1 %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %3:gr32 = MOV32rm %stack.1.a.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.a.addr) + MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %3 :: (store (s32) into %ir.retval) + + bb.3.return: + %5:gr32 = MOV32rm %stack.0.retval, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.retval) + $eax = COPY %5 + RET 0, $eax +... + +# CHECK: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: abc:entry: +# CHECK-NEXT: [ 16.50 17.10 17.70 ] +# CHECK-NEXT: Machine basic block: abc:if.then: +# CHECK-NEXT: [ 4.50 4.80 5.10 ] +# CHECK-NEXT: Machine basic block: abc:if.else: +# CHECK-NEXT: [ 0.80 1.00 1.20 ] +# CHECK-NEXT: Machine basic block: abc:return: +# CHECK-NEXT: [ 6.60 6.90 7.20 ]
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir new file mode 100644 index 0000000..338cb63 --- /dev/null +++ b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir @@ -0,0 +1,76 @@ +# REQUIRES: x86-registered-target +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + define dso_local noundef i32 @add_function(i32 noundef %a, i32 noundef %b) { + entry: + %sum = add nsw i32 %a, %b + %result = mul nsw i32 %sum, 2 + ret i32 %result + } + + define dso_local void @simple_function() { + entry: + ret void + } +... +--- +name: add_function +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $edi, $esi + + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + %2:gr32 = nsw ADD32rr %0, %1, implicit-def dead $eflags + %3:gr32 = ADD32rr %2, %2, implicit-def dead $eflags + $eax = COPY %3 + RET 0, $eax + +--- +name: simple_function +alignment: 16 +tracksRegLiveness: true +body: | + bb.0.entry: + RET 0 + +# CHECK: MIR2Vec embeddings for machine function add_function: +# CHECK: Function vector: [ 19.20 19.80 20.40 ] +# CHECK-NEXT: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: add_function:entry: +# CHECK-NEXT: [ 19.20 19.80 20.40 ] +# CHECK-NEXT: Machine instruction vectors: +# CHECK-NEXT: Machine instruction: %1:gr32 = COPY $esi +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: %0:gr32 = COPY $edi +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: %2:gr32 = nsw ADD32rr %0:gr32(tied-def 0), %1:gr32, implicit-def dead $eflags +# CHECK-NEXT: [ 1.30 1.40 1.50 ] +# CHECK-NEXT: Machine instruction: %3:gr32 = ADD32rr %2:gr32(tied-def 0), %2:gr32, implicit-def dead $eflags +# CHECK-NEXT: [ 1.30 1.40 1.50 ] +# CHECK-NEXT: Machine instruction: $eax = COPY %3:gr32 +# CHECK-NEXT: [ 5.20 5.30 5.40 ] +# CHECK-NEXT: Machine instruction: RET 0, $eax +# CHECK-NEXT: [ 1.00 1.10 1.20 ] + +# CHECK: MIR2Vec embeddings for machine function simple_function: +# CHECK-NEXT:Function vector: [ 1.00 1.10 1.20 ] +# CHECK-NEXT: Machine basic block vectors: +# CHECK-NEXT: Machine basic block: simple_function:entry: +# CHECK-NEXT: [ 1.00 1.10 1.20 ] +# CHECK-NEXT: Machine instruction vectors: +# CHECK-NEXT: Machine instruction: RET 0 +# CHECK-NEXT: [ 1.00 1.10 1.20 ]
\ No newline at end of file diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll index 80b4048..c6554bc 100644 --- a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll +++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll @@ -1,8 +1,8 @@ -; REQUIRES: x86_64-linux -; RUN: llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES -; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS +; REQUIRES: x86-registered-target +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS define dso_local void @test() { entry: diff --git a/llvm/test/CodeGen/RISCV/atomic-fence.ll b/llvm/test/CodeGen/RISCV/atomic-fence.ll index 7103345..77148f6 100644 --- a/llvm/test/CodeGen/RISCV/atomic-fence.ll +++ b/llvm/test/CodeGen/RISCV/atomic-fence.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck --check-prefixes=CHECK,WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll index 7e3abc7..c6234de 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ @@ -44,6 +48,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i8_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lb a0, 0(a0) @@ -59,6 +68,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i8_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) @@ -78,6 +92,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lb a0, 0(a0) @@ -93,6 +112,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) @@ -112,6 +136,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i8_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lb a0, 0(a0) @@ -133,6 +163,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i8_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lb a0, 0(a0) @@ -200,6 +236,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -223,6 +266,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -286,6 +336,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i16_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lh a0, 0(a0) @@ -301,6 +356,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i16_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) @@ -320,6 +380,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lh a0, 0(a0) @@ -335,6 +400,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) @@ -354,6 +424,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i16_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lh a0, 0(a0) @@ -375,6 +451,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i16_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lh a0, 0(a0) @@ -442,6 +524,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -465,6 +554,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -528,6 +624,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i32_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lw a0, 0(a0) @@ -543,6 +644,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i32_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) @@ -562,6 +668,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lw a0, 0(a0) @@ -577,6 +688,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) @@ -596,6 +712,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lw a0, 0(a0) @@ -617,6 +739,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lw a0, 0(a0) @@ -684,6 +812,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: fence r, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw @@ -707,6 +842,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -770,6 +912,16 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 0 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -790,6 +942,11 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i64_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) @@ -809,6 +966,16 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 0 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -829,6 +996,11 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_load_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) @@ -848,6 +1020,16 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 2 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -868,6 +1050,12 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: ld a0, 0(a0) @@ -914,6 +1102,16 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 5 +; RV32I-ZALRSC-NEXT: call __atomic_load_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_load_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -934,6 +1132,13 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_load_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ld a0, 0(a0) +; RV64I-ZALRSC-NEXT: fence r, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw @@ -979,6 +1184,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i8_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sb a1, 0(a0) @@ -994,6 +1204,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i8_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) @@ -1013,6 +1228,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i8_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sb a1, 0(a0) @@ -1028,6 +1248,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) @@ -1047,6 +1272,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i8_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1068,6 +1299,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i8_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1135,6 +1372,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sb a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i8_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1157,6 +1401,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sb a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i8_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1219,6 +1470,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i16_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sh a1, 0(a0) @@ -1234,6 +1490,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i16_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) @@ -1253,6 +1514,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i16_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sh a1, 0(a0) @@ -1268,6 +1534,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) @@ -1287,6 +1558,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i16_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1308,6 +1585,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i16_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1375,6 +1658,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sh a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i16_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1397,6 +1687,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sh a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i16_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1459,6 +1756,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i32_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sw a1, 0(a0) @@ -1474,6 +1776,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i32_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) @@ -1493,6 +1800,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: sw a1, 0(a0) @@ -1508,6 +1820,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) @@ -1527,6 +1844,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1548,6 +1871,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1615,6 +1944,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: fence rw, w +; RV32I-ZALRSC-NEXT: sw a1, 0(a0) +; RV32I-ZALRSC-NEXT: fence rw, rw +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, w @@ -1637,6 +1973,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sw a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1699,6 +2042,16 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_unordered: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1719,6 +2072,11 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i64_unordered: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) @@ -1738,6 +2096,16 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1758,6 +2126,11 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomic_store_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) @@ -1777,6 +2150,16 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1797,6 +2180,12 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w @@ -1843,6 +2232,16 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_store_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_store_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomic_store_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -1863,6 +2262,13 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomic_store_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: fence rw, w +; RV64I-ZALRSC-NEXT: sd a1, 0(a0) +; RV64I-ZALRSC-NEXT: fence rw, rw +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, w diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll index 4dafd6a..d5238ab 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll @@ -3,10 +3,14 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV32I-LABEL: atomicrmw_sub_i32_constant: @@ -26,6 +30,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_constant: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a1, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB0_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_constant: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -42,6 +58,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { ; RV64IA-NEXT: li a1, -1 ; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_constant: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB0_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i32 1 seq_cst ret i32 %1 } @@ -71,6 +99,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_constant: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a1, 1 +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: li a2, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_constant: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -87,6 +127,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { ; RV64IA-NEXT: li a1, -1 ; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_constant: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB1_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i64 1 seq_cst ret i64 %1 } @@ -109,6 +161,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { ; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_neg: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: sub a2, a1, a2 +; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a1, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB2_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_neg: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -125,6 +189,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { ; RV64IA-NEXT: sub a2, a2, a1 ; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_neg: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: subw a2, a1, a2 +; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB2_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %b = sub i32 %x, %y %1 = atomicrmw sub ptr %a, i32 %b seq_cst ret i32 %1 @@ -159,6 +235,20 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_neg: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sltu a5, a1, a3 +; RV32I-ZALRSC-NEXT: sub a2, a2, a4 +; RV32I-ZALRSC-NEXT: sub a2, a2, a5 +; RV32I-ZALRSC-NEXT: sub a1, a1, a3 +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_neg: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -175,6 +265,18 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV64IA-NEXT: sub a2, a2, a1 ; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_neg: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sub a2, a1, a2 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %b = sub i64 %x, %y %1 = atomicrmw sub ptr %a, i64 %b seq_cst ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 1213256..26feb83 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-WMO,RV32IA-WMO-NOZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-TSO,RV32IA-TSO-NOZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-WMO,RV64IA-WMO-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ @@ -50,6 +54,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB0_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -80,6 +104,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB0_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -174,6 +218,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB1_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -224,6 +288,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB1_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -378,6 +462,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB2_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -428,6 +532,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB2_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -582,6 +706,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -632,6 +776,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -786,6 +950,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -816,6 +1000,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -914,6 +1118,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB5_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -936,6 +1156,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB5_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1004,6 +1240,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1037,6 +1289,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1138,6 +1406,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB7_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1171,6 +1455,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB7_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1272,6 +1572,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1305,6 +1621,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1406,6 +1738,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1439,6 +1787,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1540,6 +1904,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB10_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1561,6 +1940,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB10_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -1630,6 +2024,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB11_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1661,6 +2070,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB11_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1760,6 +2184,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB12_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1791,6 +2230,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB12_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1890,6 +2344,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB13_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -1921,6 +2390,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB13_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2020,6 +2504,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a2, 255 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB14_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2051,6 +2550,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a2, 255 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB14_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -2149,6 +2663,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2179,6 +2713,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2273,6 +2827,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2323,6 +2897,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2477,6 +3071,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB17_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2527,6 +3141,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB17_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2681,6 +3315,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2731,6 +3385,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -2885,6 +3559,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB19_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -2915,6 +3609,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB19_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3009,6 +3723,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB20_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3039,6 +3773,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB20_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3137,6 +3891,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB21_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3187,6 +3961,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB21_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3345,6 +4139,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB22_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3395,6 +4209,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB22_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3553,6 +4387,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3603,6 +4457,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -3761,6 +4635,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3791,6 +4685,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3889,6 +4803,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB25_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3913,6 +4846,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB25_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -3989,6 +4941,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB26_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4027,6 +4998,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB26_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4145,6 +5135,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB27_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4183,6 +5192,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB27_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4301,6 +5329,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB28_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4339,6 +5386,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB28_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4457,6 +5523,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB29_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4495,6 +5580,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB29_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4613,6 +5717,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB30_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -4644,6 +5769,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB30_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -4865,6 +6011,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB31_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -4917,6 +6084,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB31_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5201,6 +6389,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB32_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5253,6 +6462,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB32_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5537,6 +6767,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB33_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5589,6 +6840,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB33_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -5873,6 +7145,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB34_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -5904,6 +7197,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB34_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6129,6 +7443,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB35_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6149,6 +7478,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB35_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6213,6 +7557,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB36_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6243,6 +7602,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB36_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6337,6 +7711,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB37_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6367,6 +7756,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB37_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6461,6 +7865,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB38_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6491,6 +7910,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB38_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6585,6 +8019,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB39_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6615,6 +8064,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB39_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6709,6 +8173,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB40_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6729,6 +8208,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB40_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -6793,6 +8287,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB41_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6823,6 +8332,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB41_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6917,6 +8441,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB42_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -6947,6 +8486,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB42_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7041,6 +8595,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB43_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7071,6 +8640,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB43_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7165,6 +8749,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB44_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7195,6 +8794,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB44_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7321,6 +8935,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB45_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB45_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -7392,6 +9035,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB45_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB45_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -7545,6 +9217,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB46_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB46_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7645,6 +9346,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB46_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB46_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7885,6 +9615,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB47_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -7985,6 +9744,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB47_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8225,6 +10013,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB48_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB48_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8325,6 +10142,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB48_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB48_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -8565,6 +10411,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB49_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8636,6 +10511,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB49_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8789,6 +10693,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB50_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB50_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -8860,6 +10793,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB50_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB50_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -9013,6 +10975,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB51_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB51_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9113,6 +11104,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB51_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB51_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9353,6 +11373,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB52_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB52_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9453,6 +11502,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB52_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB52_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9693,6 +11771,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB53_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB53_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -9793,6 +11900,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB53_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB53_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10033,6 +12169,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB54_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB54_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10104,6 +12269,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB54_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB54_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10255,6 +12449,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB55_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10319,6 +12537,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB55_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -10455,6 +12697,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB56_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10543,6 +12809,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB56_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10751,6 +13041,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB57_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -10839,6 +13153,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB57_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11047,6 +13385,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB58_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11135,6 +13497,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB58_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11343,6 +13729,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB59_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11407,6 +13817,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB59_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11543,6 +13977,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB60_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11607,6 +14065,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB60_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -11743,6 +14225,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB61_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -11831,6 +14337,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB61_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12039,6 +14569,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB62_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12127,6 +14681,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB62_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12335,6 +14913,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB63_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12423,6 +15025,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB63_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12631,6 +15257,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB64_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12695,6 +15345,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB64_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12801,6 +15475,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB65_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12832,6 +15527,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB65_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -12929,6 +15645,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB66_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -12981,6 +15718,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB66_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13141,6 +15899,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB67_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13193,6 +15972,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB67_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13353,6 +16153,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB68_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13405,6 +16226,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB68_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -13565,6 +16407,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB69_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -13596,6 +16459,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB69_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -13697,6 +16581,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB70_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -13720,6 +16621,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB70_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -13791,6 +16709,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB71_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13826,6 +16761,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB71_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13933,6 +16885,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB72_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -13968,6 +16937,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB72_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14075,6 +17061,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB73_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14110,6 +17113,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB73_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14217,6 +17237,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: and a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB74_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14252,6 +17289,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: and a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB74_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14360,6 +17414,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB75_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -14383,6 +17453,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB75_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 @@ -14456,6 +17542,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB76_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14490,6 +17592,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB76_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14596,6 +17714,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB77_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14630,6 +17764,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB77_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14736,6 +17886,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB78_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14770,6 +17936,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB78_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14876,6 +18058,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a1, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: or a4, a3, a2 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB79_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -14910,6 +18108,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: or a4, a3, a2 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB79_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 @@ -15014,6 +18228,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB80_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15045,6 +18280,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB80_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15142,6 +18398,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB81_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15194,6 +18471,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB81_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15354,6 +18652,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB82_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15406,6 +18725,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB82_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15566,6 +18906,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB83_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15618,6 +18979,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB83_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -15778,6 +19160,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB84_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15809,6 +19212,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB84_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15906,6 +19330,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB85_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -15937,6 +19382,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB85_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16038,6 +19504,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB86_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16090,6 +19577,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB86_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16254,6 +19762,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB87_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16306,6 +19835,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB87_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16470,6 +20020,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB88_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16522,6 +20093,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB88_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16686,6 +20278,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB89_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16717,6 +20330,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB89_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16818,6 +20452,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB90_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16843,6 +20497,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB90_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -16922,6 +20596,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB91_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -16962,6 +20656,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB91_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17086,6 +20800,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB92_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17126,6 +20860,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB92_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17250,6 +21004,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB93_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17290,6 +21064,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB93_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17414,6 +21208,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB94_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17454,6 +21268,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB94_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17578,6 +21412,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB95_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -17610,6 +21466,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB95_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -17838,6 +21716,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB96_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -17892,6 +21792,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB96_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18186,6 +22108,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB97_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18240,6 +22184,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB97_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18534,6 +22500,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB98_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18588,6 +22576,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB98_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -18882,6 +22892,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB99_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -18914,6 +22946,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB99_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19146,6 +23200,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB100_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19167,6 +23237,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB100_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19234,6 +23320,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB101_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19266,6 +23368,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB101_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19366,6 +23484,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB102_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19398,6 +23532,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB102_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19498,6 +23648,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB103_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19530,6 +23696,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB103_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19630,6 +23812,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB104_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19662,6 +23860,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB104_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19762,6 +23976,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB105_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19783,6 +24013,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB105_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -19850,6 +24096,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB106_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19882,6 +24144,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB106_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -19982,6 +24260,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB107_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20014,6 +24308,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB107_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20114,6 +24424,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB108_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20146,6 +24472,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB108_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20246,6 +24588,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB109_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20278,6 +24636,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB109_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20410,6 +24784,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB110_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -20483,6 +24888,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB110_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -20642,6 +25078,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB111_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20746,6 +25213,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB111_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -20998,6 +25496,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB112_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21102,6 +25631,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB112_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21354,6 +25914,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB113_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21458,6 +26049,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB113_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -21710,6 +26332,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB114_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -21783,6 +26436,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB114_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -21942,6 +26626,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB115_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -22015,6 +26730,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB115_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -22174,6 +26920,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB116_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22278,6 +27055,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB116_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22530,6 +27338,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB117_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22634,6 +27473,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB117_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22886,6 +27756,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB118_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -22990,6 +27891,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB118_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -23242,6 +28174,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB119_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23315,6 +28278,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB119_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23476,6 +28470,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB120_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23545,6 +28564,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB120_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -23688,6 +28732,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB121_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -23782,6 +28851,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB121_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24000,6 +29094,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB122_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24094,6 +29213,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB122_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24312,6 +29456,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB123_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24406,6 +29575,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB123_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -24624,6 +29818,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB124_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24693,6 +29912,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB124_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24836,6 +30080,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB125_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -24905,6 +30174,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB125_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -25048,6 +30342,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB126_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25142,6 +30461,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB126_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25360,6 +30704,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB127_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25454,6 +30823,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB127_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25672,6 +31066,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB128_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25766,6 +31185,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB128_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 @@ -25984,6 +31428,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB129_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -26053,6 +31522,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB129_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 @@ -26162,6 +31656,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB130_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoswap.w a0, a1, (a0) @@ -26177,6 +31682,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB130_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xchg_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.w a0, a1, (a0) @@ -26196,6 +31712,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB131_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0) @@ -26216,6 +31743,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB131_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0) @@ -26240,6 +31778,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB132_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0) @@ -26260,6 +31809,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB132_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0) @@ -26284,6 +31844,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB133_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26304,6 +31875,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB133_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26328,6 +31910,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB134_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26348,6 +31941,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB134_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0) @@ -26372,6 +31976,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB135_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoadd.w a0, a1, (a0) @@ -26387,6 +32002,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB135_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_add_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.w a0, a1, (a0) @@ -26406,6 +32032,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB136_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0) @@ -26426,6 +32063,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB136_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0) @@ -26450,6 +32098,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB137_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0) @@ -26470,6 +32129,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB137_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0) @@ -26494,6 +32164,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB138_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26514,6 +32195,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB138_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26538,6 +32230,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB139_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_add_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26558,6 +32261,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB139_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0) @@ -26582,6 +32296,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB140_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: neg a1, a1 @@ -26598,6 +32323,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB140_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_sub_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: neg a1, a1 @@ -26618,6 +32354,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB141_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26640,6 +32387,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB141_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26666,6 +32424,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB142_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26688,6 +32457,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB142_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26714,6 +32494,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB143_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26736,6 +32527,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB143_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26762,6 +32564,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB144_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: neg a1, a1 @@ -26784,6 +32597,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB144_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -26810,6 +32634,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB145_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoand.w a0, a1, (a0) @@ -26825,6 +32660,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB145_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_and_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.w a0, a1, (a0) @@ -26844,6 +32690,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB146_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aq a0, a1, (a0) @@ -26864,6 +32721,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB146_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aq a0, a1, (a0) @@ -26888,6 +32756,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB147_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.rl a0, a1, (a0) @@ -26908,6 +32787,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB147_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.rl a0, a1, (a0) @@ -26932,6 +32822,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB148_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26952,6 +32853,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB148_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26976,6 +32888,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB149_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_and_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -26996,6 +32919,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB149_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0) @@ -27020,6 +32954,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB150_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 @@ -27042,6 +32988,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB150_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 @@ -27200,6 +33158,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB151_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 @@ -27234,6 +33204,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB151_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1 @@ -27432,6 +33414,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB152_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 @@ -27466,6 +33460,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB152_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 @@ -27664,6 +33670,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB153_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: ; RV32IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 @@ -27698,6 +33716,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB153_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 @@ -27896,6 +33926,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB154_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst: ; RV32IA-NOZACAS: # %bb.0: ; RV32IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 @@ -27918,6 +33960,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB154_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1 @@ -28112,6 +34166,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB155_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoor.w a0, a1, (a0) @@ -28127,6 +34192,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB155_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_or_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.w a0, a1, (a0) @@ -28146,6 +34222,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB156_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aq a0, a1, (a0) @@ -28166,6 +34253,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB156_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aq a0, a1, (a0) @@ -28190,6 +34288,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB157_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.rl a0, a1, (a0) @@ -28210,6 +34319,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB157_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.rl a0, a1, (a0) @@ -28234,6 +34354,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB158_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28254,6 +34385,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB158_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28278,6 +34420,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB159_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_or_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28298,6 +34451,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB159_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0) @@ -28322,6 +34486,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB160_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amoxor.w a0, a1, (a0) @@ -28337,6 +34512,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB160_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xor_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.w a0, a1, (a0) @@ -28356,6 +34542,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB161_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0) @@ -28376,6 +34573,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB161_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0) @@ -28400,6 +34608,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB162_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0) @@ -28420,6 +34639,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB162_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0) @@ -28444,6 +34674,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB163_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28464,6 +34705,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB163_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28488,6 +34740,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB164_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28508,6 +34771,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB164_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0) @@ -28558,6 +34832,21 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB165_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB165_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomax.w a0, a1, (a0) @@ -28602,6 +34891,22 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB165_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB165_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_max_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.w a0, a1, (a0) @@ -28647,6 +34952,21 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB166_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB166_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aq a0, a1, (a0) @@ -28696,6 +35016,22 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB166_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB166_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aq a0, a1, (a0) @@ -28746,6 +35082,21 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB167_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB167_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.rl a0, a1, (a0) @@ -28795,6 +35146,22 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB167_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB167_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.rl a0, a1, (a0) @@ -28845,6 +35212,21 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB168_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB168_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28894,6 +35276,22 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB168_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB168_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28944,6 +35342,21 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB169_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB169_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_max_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -28993,6 +35406,22 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB169_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB169_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0) @@ -29043,6 +35472,21 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB170_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB170_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomin.w a0, a1, (a0) @@ -29087,6 +35531,22 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB170_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB170_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_min_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.w a0, a1, (a0) @@ -29132,6 +35592,21 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB171_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB171_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aq a0, a1, (a0) @@ -29181,6 +35656,22 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB171_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB171_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aq a0, a1, (a0) @@ -29231,6 +35722,21 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB172_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB172_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.rl a0, a1, (a0) @@ -29280,6 +35786,22 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB172_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB172_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.rl a0, a1, (a0) @@ -29330,6 +35852,21 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB173_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB173_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29379,6 +35916,22 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB173_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB173_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29429,6 +35982,21 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB174_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB174_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_min_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29478,6 +36046,22 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB174_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB174_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0) @@ -29528,6 +36112,21 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB175_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB175_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amomaxu.w a0, a1, (a0) @@ -29572,6 +36171,22 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB175_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB175_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umax_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.w a0, a1, (a0) @@ -29617,6 +36232,21 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB176_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB176_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0) @@ -29666,6 +36296,22 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB176_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB176_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0) @@ -29716,6 +36362,21 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB177_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB177_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0) @@ -29765,6 +36426,22 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB177_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB177_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0) @@ -29815,6 +36492,21 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB178_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB178_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29864,6 +36556,22 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB178_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB178_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29914,6 +36622,21 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB179_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB179_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -29963,6 +36686,22 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB179_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB179_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0) @@ -30013,6 +36752,21 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB180_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB180_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i32_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: amominu.w a0, a1, (a0) @@ -30057,6 +36811,22 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB180_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB180_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umin_i32_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.w a0, a1, (a0) @@ -30102,6 +36872,21 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB181_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB181_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aq a0, a1, (a0) @@ -30151,6 +36936,22 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB181_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB181_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aq a0, a1, (a0) @@ -30201,6 +37002,21 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB182_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB182_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_release: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.rl a0, a1, (a0) @@ -30250,6 +37066,22 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB182_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB182_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.rl a0, a1, (a0) @@ -30300,6 +37132,21 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB183_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB183_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30349,6 +37196,22 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB183_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB183_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30399,6 +37262,21 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB184_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB184_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30448,6 +37326,22 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB184_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB184_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0) @@ -30472,6 +37366,16 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30492,6 +37396,17 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB185_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB185_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xchg_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.d a0, a1, (a0) @@ -30511,6 +37426,16 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30531,6 +37456,17 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB186_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB186_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aq a0, a1, (a0) @@ -30555,6 +37491,16 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30575,6 +37521,17 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB187_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB187_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.rl a0, a1, (a0) @@ -30599,6 +37556,16 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30619,6 +37586,17 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB188_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB188_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0) @@ -30643,6 +37621,16 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xchg_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30663,6 +37651,17 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB189_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB189_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0) @@ -30687,6 +37686,16 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30707,6 +37716,17 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB190_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB190_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_add_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.d a0, a1, (a0) @@ -30726,6 +37746,16 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30746,6 +37776,17 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB191_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB191_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aq a0, a1, (a0) @@ -30770,6 +37811,16 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30790,6 +37841,17 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB192_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB192_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.rl a0, a1, (a0) @@ -30814,6 +37876,16 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30834,6 +37906,17 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB193_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB193_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0) @@ -30858,6 +37941,16 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_add_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30878,6 +37971,17 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB194_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB194_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_add_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0) @@ -30902,6 +38006,16 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30922,6 +38036,17 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB195_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB195_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_sub_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: neg a1, a1 @@ -30942,6 +38067,16 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -30962,6 +38097,17 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB196_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB196_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -30988,6 +38134,16 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31008,6 +38164,17 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB197_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB197_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31034,6 +38201,16 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31054,6 +38231,17 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB198_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB198_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31080,6 +38268,16 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_sub_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31100,6 +38298,17 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB199_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB199_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_sub_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: neg a1, a1 @@ -31126,6 +38335,16 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31146,6 +38365,17 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB200_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB200_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_and_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.d a0, a1, (a0) @@ -31165,6 +38395,16 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31185,6 +38425,17 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB201_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB201_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aq a0, a1, (a0) @@ -31209,6 +38460,16 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31229,6 +38490,17 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB202_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB202_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.rl a0, a1, (a0) @@ -31253,6 +38525,16 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31273,6 +38555,17 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB203_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB203_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0) @@ -31297,6 +38590,16 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_and_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31317,6 +38620,17 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB204_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB204_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_and_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0) @@ -31341,6 +38655,16 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31361,6 +38685,18 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB205_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_monotonic: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1 @@ -31453,6 +38789,16 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31473,6 +38819,18 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB206_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1 @@ -31591,6 +38949,16 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31611,6 +38979,18 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB207_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_release: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1 @@ -31729,6 +39109,16 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31749,6 +39139,18 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB208_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: ; RV64IA-WMO-NOZACAS-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1 @@ -31867,6 +39269,16 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_nand_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -31887,6 +39299,18 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB209_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_seq_cst: ; RV64IA-NOZACAS: # %bb.0: ; RV64IA-NOZACAS-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1 @@ -31997,6 +39421,16 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32017,6 +39451,17 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB210_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB210_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_or_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.d a0, a1, (a0) @@ -32036,6 +39481,16 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32056,6 +39511,17 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB211_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB211_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aq a0, a1, (a0) @@ -32080,6 +39546,16 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32100,6 +39576,17 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB212_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB212_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.rl a0, a1, (a0) @@ -32124,6 +39611,16 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32144,6 +39641,17 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB213_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB213_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0) @@ -32168,6 +39676,16 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_or_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32188,6 +39706,17 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB214_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB214_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_or_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0) @@ -32212,6 +39741,16 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32232,6 +39771,17 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB215_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB215_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_xor_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.d a0, a1, (a0) @@ -32251,6 +39801,16 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 2 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32271,6 +39831,17 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB216_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB216_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aq a0, a1, (a0) @@ -32295,6 +39866,16 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 3 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32315,6 +39896,17 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB217_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB217_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.rl a0, a1, (a0) @@ -32339,6 +39931,16 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 4 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32359,6 +39961,17 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB218_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB218_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0) @@ -32383,6 +39996,16 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 5 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_xor_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 @@ -32403,6 +40026,17 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB219_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB219_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_xor_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0) @@ -32471,6 +40105,60 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB220_2 +; RV32I-ZALRSC-NEXT: .LBB220_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_7 +; RV32I-ZALRSC-NEXT: .LBB220_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB220_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB220_5 +; RV32I-ZALRSC-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB220_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB220_1 +; RV32I-ZALRSC-NEXT: .LBB220_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32561,6 +40249,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB220_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB220_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB220_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB220_3: # in Loop: Header=BB220_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB220_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_max_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.d a0, a1, (a0) @@ -32624,6 +40327,60 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB221_2 +; RV32I-ZALRSC-NEXT: .LBB221_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_7 +; RV32I-ZALRSC-NEXT: .LBB221_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB221_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB221_5 +; RV32I-ZALRSC-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB221_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB221_1 +; RV32I-ZALRSC-NEXT: .LBB221_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32714,6 +40471,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB221_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB221_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB221_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB221_3: # in Loop: Header=BB221_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB221_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aq a0, a1, (a0) @@ -32782,6 +40554,60 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB222_2 +; RV32I-ZALRSC-NEXT: .LBB222_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_7 +; RV32I-ZALRSC-NEXT: .LBB222_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB222_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB222_5 +; RV32I-ZALRSC-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB222_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB222_1 +; RV32I-ZALRSC-NEXT: .LBB222_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -32872,6 +40698,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB222_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB222_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB222_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB222_3: # in Loop: Header=BB222_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB222_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.rl a0, a1, (a0) @@ -32940,6 +40781,60 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB223_2 +; RV32I-ZALRSC-NEXT: .LBB223_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_7 +; RV32I-ZALRSC-NEXT: .LBB223_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB223_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB223_5 +; RV32I-ZALRSC-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB223_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB223_1 +; RV32I-ZALRSC-NEXT: .LBB223_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33030,6 +40925,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB223_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB223_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB223_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB223_3: # in Loop: Header=BB223_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB223_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0) @@ -33098,6 +41008,60 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB224_2 +; RV32I-ZALRSC-NEXT: .LBB224_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_7 +; RV32I-ZALRSC-NEXT: .LBB224_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB224_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB224_5 +; RV32I-ZALRSC-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB224_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB224_1 +; RV32I-ZALRSC-NEXT: .LBB224_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_max_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33188,6 +41152,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB224_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB224_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB224_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB224_3: # in Loop: Header=BB224_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB224_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_max_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0) @@ -33256,6 +41235,60 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB225_2 +; RV32I-ZALRSC-NEXT: .LBB225_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB225_7 +; RV32I-ZALRSC-NEXT: .LBB225_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB225_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB225_5 +; RV32I-ZALRSC-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB225_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB225_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB225_1 +; RV32I-ZALRSC-NEXT: .LBB225_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33346,6 +41379,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB225_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB225_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB225_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB225_3: # in Loop: Header=BB225_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB225_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_min_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.d a0, a1, (a0) @@ -33409,6 +41457,60 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB226_2 +; RV32I-ZALRSC-NEXT: .LBB226_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB226_7 +; RV32I-ZALRSC-NEXT: .LBB226_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB226_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB226_5 +; RV32I-ZALRSC-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB226_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB226_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB226_1 +; RV32I-ZALRSC-NEXT: .LBB226_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33499,6 +41601,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB226_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB226_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB226_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB226_3: # in Loop: Header=BB226_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB226_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aq a0, a1, (a0) @@ -33567,6 +41684,60 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB227_2 +; RV32I-ZALRSC-NEXT: .LBB227_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB227_7 +; RV32I-ZALRSC-NEXT: .LBB227_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB227_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB227_5 +; RV32I-ZALRSC-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB227_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB227_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB227_1 +; RV32I-ZALRSC-NEXT: .LBB227_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33657,6 +41828,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB227_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB227_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB227_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB227_3: # in Loop: Header=BB227_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB227_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.rl a0, a1, (a0) @@ -33725,6 +41911,60 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB228_2 +; RV32I-ZALRSC-NEXT: .LBB228_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB228_7 +; RV32I-ZALRSC-NEXT: .LBB228_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB228_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB228_5 +; RV32I-ZALRSC-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB228_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB228_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB228_1 +; RV32I-ZALRSC-NEXT: .LBB228_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33815,6 +42055,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB228_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB228_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB228_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB228_3: # in Loop: Header=BB228_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB228_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0) @@ -33883,6 +42138,60 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB229_2 +; RV32I-ZALRSC-NEXT: .LBB229_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB229_7 +; RV32I-ZALRSC-NEXT: .LBB229_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB229_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB229_5 +; RV32I-ZALRSC-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB229_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB229_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB229_1 +; RV32I-ZALRSC-NEXT: .LBB229_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_min_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -33973,6 +42282,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB229_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB229_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB229_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB229_3: # in Loop: Header=BB229_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB229_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_min_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0) @@ -34041,6 +42365,60 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB230_2 +; RV32I-ZALRSC-NEXT: .LBB230_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_7 +; RV32I-ZALRSC-NEXT: .LBB230_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB230_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB230_5 +; RV32I-ZALRSC-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB230_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB230_1 +; RV32I-ZALRSC-NEXT: .LBB230_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34131,6 +42509,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB230_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB230_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB230_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB230_3: # in Loop: Header=BB230_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB230_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umax_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.d a0, a1, (a0) @@ -34194,6 +42587,60 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB231_2 +; RV32I-ZALRSC-NEXT: .LBB231_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_7 +; RV32I-ZALRSC-NEXT: .LBB231_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB231_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB231_5 +; RV32I-ZALRSC-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB231_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB231_1 +; RV32I-ZALRSC-NEXT: .LBB231_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34284,6 +42731,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB231_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB231_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB231_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB231_3: # in Loop: Header=BB231_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB231_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aq a0, a1, (a0) @@ -34352,6 +42814,60 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB232_2 +; RV32I-ZALRSC-NEXT: .LBB232_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_7 +; RV32I-ZALRSC-NEXT: .LBB232_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB232_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB232_5 +; RV32I-ZALRSC-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB232_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB232_1 +; RV32I-ZALRSC-NEXT: .LBB232_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34442,6 +42958,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB232_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB232_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB232_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB232_3: # in Loop: Header=BB232_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB232_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.rl a0, a1, (a0) @@ -34510,6 +43041,60 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB233_2 +; RV32I-ZALRSC-NEXT: .LBB233_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_7 +; RV32I-ZALRSC-NEXT: .LBB233_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB233_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB233_5 +; RV32I-ZALRSC-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB233_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB233_1 +; RV32I-ZALRSC-NEXT: .LBB233_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34600,6 +43185,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB233_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB233_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB233_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB233_3: # in Loop: Header=BB233_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB233_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0) @@ -34668,6 +43268,60 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB234_2 +; RV32I-ZALRSC-NEXT: .LBB234_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_7 +; RV32I-ZALRSC-NEXT: .LBB234_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB234_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB234_5 +; RV32I-ZALRSC-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB234_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB234_1 +; RV32I-ZALRSC-NEXT: .LBB234_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34758,6 +43412,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB234_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB234_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB234_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB234_3: # in Loop: Header=BB234_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB234_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umax_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0) @@ -34826,6 +43495,60 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB235_2 +; RV32I-ZALRSC-NEXT: .LBB235_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB235_7 +; RV32I-ZALRSC-NEXT: .LBB235_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB235_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB235_5 +; RV32I-ZALRSC-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB235_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB235_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB235_1 +; RV32I-ZALRSC-NEXT: .LBB235_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -34916,6 +43639,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB235_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB235_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB235_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB235_3: # in Loop: Header=BB235_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB235_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-LABEL: atomicrmw_umin_i64_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.d a0, a1, (a0) @@ -34979,6 +43717,60 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB236_2 +; RV32I-ZALRSC-NEXT: .LBB236_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 2 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB236_7 +; RV32I-ZALRSC-NEXT: .LBB236_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB236_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB236_5 +; RV32I-ZALRSC-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB236_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB236_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB236_1 +; RV32I-ZALRSC-NEXT: .LBB236_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35069,6 +43861,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB236_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB236_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB236_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB236_3: # in Loop: Header=BB236_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB236_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aq a0, a1, (a0) @@ -35137,6 +43944,60 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_release: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB237_2 +; RV32I-ZALRSC-NEXT: .LBB237_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 3 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB237_7 +; RV32I-ZALRSC-NEXT: .LBB237_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB237_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB237_5 +; RV32I-ZALRSC-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB237_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB237_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB237_1 +; RV32I-ZALRSC-NEXT: .LBB237_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_release: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35227,6 +44088,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_release: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB237_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB237_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB237_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB237_3: # in Loop: Header=BB237_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB237_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_release: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.rl a0, a1, (a0) @@ -35295,6 +44171,60 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB238_2 +; RV32I-ZALRSC-NEXT: .LBB238_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 4 +; RV32I-ZALRSC-NEXT: li a5, 2 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB238_7 +; RV32I-ZALRSC-NEXT: .LBB238_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB238_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB238_5 +; RV32I-ZALRSC-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB238_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB238_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB238_1 +; RV32I-ZALRSC-NEXT: .LBB238_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35385,6 +44315,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB238_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB238_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB238_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB238_3: # in Loop: Header=BB238_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB238_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acq_rel: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0) @@ -35453,6 +44398,60 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB239_2 +; RV32I-ZALRSC-NEXT: .LBB239_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: li a4, 5 +; RV32I-ZALRSC-NEXT: li a5, 5 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB239_7 +; RV32I-ZALRSC-NEXT: .LBB239_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB239_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB239_5 +; RV32I-ZALRSC-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB239_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB239_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB239_1 +; RV32I-ZALRSC-NEXT: .LBB239_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -32 @@ -35543,6 +44542,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB239_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB239_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB239_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB239_3: # in Loop: Header=BB239_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB239_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret +; ; RV64IA-WMO-LABEL: atomicrmw_umin_i64_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0) diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 7d29ac9..7fe5fa7 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -5,12 +5,16 @@ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s +; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s +; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32I-LABEL: atomic_load_i8_unordered: @@ -30,6 +34,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lb a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lb a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -46,6 +55,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lb a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i8, ptr %a unordered, align 1 ret i8 %1 } @@ -68,6 +82,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lh a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lh a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -84,6 +103,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lh a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i16, ptr %a unordered, align 2 ret i16 %1 } @@ -104,6 +128,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: lw a0, 0(a0) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -119,6 +148,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: lw a0, 0(a0) +; RV64I-ZALRSC-NEXT: ret %1 = load atomic i32, ptr %a unordered, align 4 ret i32 %1 } @@ -159,6 +193,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -192,6 +248,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b monotonic ret i8 %1 } @@ -231,6 +309,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -264,6 +364,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b monotonic ret i8 %1 } @@ -303,6 +425,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB5_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -336,6 +480,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB5_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b monotonic ret i8 %1 } @@ -369,6 +535,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -396,6 +583,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i8 %b monotonic ret i8 %1 } @@ -436,6 +644,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB7_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -470,6 +701,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB7_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b monotonic ret i8 %1 } @@ -499,6 +753,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -522,6 +793,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i8 %b monotonic ret i8 %1 } @@ -551,6 +839,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -574,6 +879,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i8 %b monotonic ret i8 %1 } @@ -653,6 +975,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB10_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB10_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -726,6 +1079,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB10_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB10_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b monotonic ret i8 %1 } @@ -805,6 +1189,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: srai a1, a1, 24 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: xori a4, a4, 24 +; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB11_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB11_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -878,6 +1293,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: srai a1, a1, 56 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: xori a4, a4, 56 +; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB11_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB11_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b monotonic ret i8 %1 } @@ -950,6 +1396,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB12_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1016,6 +1488,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB12_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b monotonic ret i8 %1 } @@ -1088,6 +1586,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a6, a4, a3 +; RV32I-ZALRSC-NEXT: mv a5, a4 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a3 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB13_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1154,6 +1678,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a6, a4, a3 +; RV64I-ZALRSC-NEXT: mv a5, a4 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a3 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB13_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b monotonic ret i8 %1 } @@ -1194,6 +1744,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: mv a5, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB14_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1228,6 +1801,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: mv a5, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB14_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b monotonic ret i16 %1 } @@ -1268,6 +1864,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: add a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1302,6 +1921,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: add a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b monotonic ret i16 %1 } @@ -1342,6 +1984,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: sub a5, a3, a1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1376,6 +2041,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: sub a5, a3, a1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b monotonic ret i16 %1 } @@ -1410,6 +2098,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: not a3, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB17_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1438,6 +2148,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: not a3, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB17_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i16 %b monotonic ret i16 %1 } @@ -1479,6 +2211,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: not a5, a5 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1514,6 +2270,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: not a5, a5 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b monotonic ret i16 %1 } @@ -1544,6 +2324,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB19_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1568,6 +2366,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB19_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i16 %b monotonic ret i16 %1 } @@ -1598,6 +2414,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srli a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: xor a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB20_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1622,6 +2456,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srli a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: xor a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB20_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i16 %b monotonic ret i16 %1 } @@ -1703,6 +2555,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB21_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1778,6 +2663,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB21_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b monotonic ret i16 %1 } @@ -1859,6 +2777,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a4, 16 +; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a2) +; RV32I-ZALRSC-NEXT: and a7, a5, a3 +; RV32I-ZALRSC-NEXT: mv a6, a5 +; RV32I-ZALRSC-NEXT: sll a7, a7, a4 +; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: and a6, a6, a3 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB22_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -1934,6 +2885,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a4, 48 +; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a2) +; RV64I-ZALRSC-NEXT: and a7, a5, a3 +; RV64I-ZALRSC-NEXT: mv a6, a5 +; RV64I-ZALRSC-NEXT: sll a7, a7, a4 +; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: and a6, a6, a3 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB22_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b monotonic ret i16 %1 } @@ -2011,6 +2995,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2082,6 +3093,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b monotonic ret i16 %1 } @@ -2159,6 +3197,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: sll a4, a3, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: and a6, a3, a4 +; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a3, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2230,6 +3295,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: and a6, a3, a4 +; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a3, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b monotonic ret i16 %1 } @@ -2250,6 +3342,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoswap.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB25_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2265,6 +3368,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB25_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i32 %b monotonic ret i32 %1 } @@ -2285,6 +3399,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: add a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB26_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2300,6 +3425,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB26_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i32 %b monotonic ret i32 %1 } @@ -2321,6 +3457,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: sub a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB27_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2337,6 +3484,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA-NEXT: neg a1, a1 ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB27_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i32 %b monotonic ret i32 %1 } @@ -2357,6 +3515,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoand.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB28_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2372,6 +3541,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB28_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i32 %b monotonic ret i32 %1 } @@ -2413,6 +3593,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a3, a2, a1 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB29_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2449,6 +3641,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB29_1 ; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB29_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i32 %b monotonic ret i32 %1 } @@ -2469,6 +3673,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: or a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB30_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2484,6 +3699,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB30_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i32 %b monotonic ret i32 %1 } @@ -2504,6 +3730,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amoxor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: xor a3, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB31_1 +; RV32I-ZALRSC-NEXT: # %bb.2: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2519,6 +3756,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB31_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i32 %b monotonic ret i32 %1 } @@ -2565,6 +3813,21 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomax.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB32_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB32_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2608,6 +3871,22 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB32_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB32_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i32 %b monotonic ret i32 %1 } @@ -2654,6 +3933,21 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomin.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB33_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB33_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2697,6 +3991,22 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB33_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB33_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i32 %b monotonic ret i32 %1 } @@ -2743,6 +4053,21 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amomaxu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB34_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB34_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2786,6 +4111,22 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB34_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB34_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i32 %b monotonic ret i32 %1 } @@ -2832,6 +4173,21 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32IA-NEXT: amominu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB35_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a1 +; RV32I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB35_1 +; RV32I-ZALRSC-NEXT: # %bb.4: +; RV32I-ZALRSC-NEXT: mv a0, a2 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -48 @@ -2875,6 +4231,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.w a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: sext.w a2, a1 +; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB35_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB35_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i32 %b monotonic ret i32 %1 } @@ -2900,6 +4272,16 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_exchange_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2914,6 +4296,17 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoswap.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB36_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xchg ptr %a, i64 %b monotonic ret i64 %1 } @@ -2939,6 +4332,16 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2953,6 +4356,17 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoadd.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: add a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB37_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw add ptr %a, i64 %b monotonic ret i64 %1 } @@ -2978,6 +4392,16 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -2993,6 +4417,17 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA-NEXT: neg a1, a1 ; RV64IA-NEXT: amoadd.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB38_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw sub ptr %a, i64 %b monotonic ret i64 %1 } @@ -3018,6 +4453,16 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3032,6 +4477,17 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoand.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB39_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw and ptr %a, i64 %b monotonic ret i64 %1 } @@ -3057,6 +4513,16 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3092,6 +4558,18 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB40_1 ; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: and a3, a2, a1 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB40_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw nand ptr %a, i64 %b monotonic ret i64 %1 } @@ -3117,6 +4595,16 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3131,6 +4619,17 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoor.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: or a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB41_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw or ptr %a, i64 %b monotonic ret i64 %1 } @@ -3156,6 +4655,16 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -16 +; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: li a3, 0 +; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8 +; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3170,6 +4679,17 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amoxor.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a2, a1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB42_1 +; RV64I-ZALRSC-NEXT: # %bb.2: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw xor ptr %a, i64 %b monotonic ret i64 %1 } @@ -3283,6 +4803,60 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB43_2 +; RV32I-ZALRSC-NEXT: .LBB43_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_7 +; RV32I-ZALRSC-NEXT: .LBB43_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB43_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB43_5 +; RV32I-ZALRSC-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB43_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB43_1 +; RV32I-ZALRSC-NEXT: .LBB43_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3323,6 +4897,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomax.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB43_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB43_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw max ptr %a, i64 %b monotonic ret i64 %1 } @@ -3436,6 +5025,60 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB44_2 +; RV32I-ZALRSC-NEXT: .LBB44_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB44_7 +; RV32I-ZALRSC-NEXT: .LBB44_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB44_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: slt a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB44_5 +; RV32I-ZALRSC-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB44_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB44_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB44_1 +; RV32I-ZALRSC-NEXT: .LBB44_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3476,6 +5119,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomin.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB44_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB44_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw min ptr %a, i64 %b monotonic ret i64 %1 } @@ -3589,6 +5247,60 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB45_2 +; RV32I-ZALRSC-NEXT: .LBB45_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_7 +; RV32I-ZALRSC-NEXT: .LBB45_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB45_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB45_5 +; RV32I-ZALRSC-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB45_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB45_1 +; RV32I-ZALRSC-NEXT: .LBB45_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3629,6 +5341,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amomaxu.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB45_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB45_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umax ptr %a, i64 %b monotonic ret i64 %1 } @@ -3742,6 +5469,60 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: addi sp, sp, 32 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: addi sp, sp, -32 +; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-ZALRSC-NEXT: mv s0, a2 +; RV32I-ZALRSC-NEXT: mv s1, a0 +; RV32I-ZALRSC-NEXT: lw a4, 0(a0) +; RV32I-ZALRSC-NEXT: lw a5, 4(a0) +; RV32I-ZALRSC-NEXT: mv s2, a1 +; RV32I-ZALRSC-NEXT: j .LBB46_2 +; RV32I-ZALRSC-NEXT: .LBB46_1: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sw a4, 8(sp) +; RV32I-ZALRSC-NEXT: sw a5, 12(sp) +; RV32I-ZALRSC-NEXT: addi a1, sp, 8 +; RV32I-ZALRSC-NEXT: mv a0, s1 +; RV32I-ZALRSC-NEXT: li a4, 0 +; RV32I-ZALRSC-NEXT: li a5, 0 +; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8 +; RV32I-ZALRSC-NEXT: lw a4, 8(sp) +; RV32I-ZALRSC-NEXT: lw a5, 12(sp) +; RV32I-ZALRSC-NEXT: bnez a0, .LBB46_7 +; RV32I-ZALRSC-NEXT: .LBB46_2: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB46_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s0, a5 +; RV32I-ZALRSC-NEXT: j .LBB46_5 +; RV32I-ZALRSC-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: sltu a0, s2, a4 +; RV32I-ZALRSC-NEXT: .LBB46_5: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, a4 +; RV32I-ZALRSC-NEXT: mv a3, a5 +; RV32I-ZALRSC-NEXT: beqz a0, .LBB46_1 +; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-ZALRSC-NEXT: mv a2, s2 +; RV32I-ZALRSC-NEXT: mv a3, s0 +; RV32I-ZALRSC-NEXT: j .LBB46_1 +; RV32I-ZALRSC-NEXT: .LBB46_7: # %atomicrmw.end +; RV32I-ZALRSC-NEXT: mv a0, a4 +; RV32I-ZALRSC-NEXT: mv a1, a5 +; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-ZALRSC-NEXT: addi sp, sp, 32 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -3782,6 +5563,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: amominu.d a0, a1, (a0) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.d a2, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB46_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB46_1 +; RV64I-ZALRSC-NEXT: # %bb.4: +; RV64I-ZALRSC-NEXT: mv a0, a2 +; RV64I-ZALRSC-NEXT: ret %1 = atomicrmw umin ptr %a, i64 %b monotonic ret i64 %1 } @@ -3827,6 +5623,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: zext.b a2, a2 +; RV32I-ZALRSC-NEXT: sll a4, a4, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a5, (a3) +; RV32I-ZALRSC-NEXT: and a6, a5, a4 +; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a5, a2 +; RV32I-ZALRSC-NEXT: and a6, a6, a4 +; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV32I-ZALRSC-NEXT: .LBB47_3: +; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 24 +; RV32I-ZALRSC-NEXT: srai a0, a0, 24 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3866,6 +5688,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: zext.b a2, a2 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a5, (a3) +; RV64I-ZALRSC-NEXT: and a6, a5, a4 +; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a5, a2 +; RV64I-ZALRSC-NEXT: and a6, a6, a4 +; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1 +; RV64I-ZALRSC-NEXT: .LBB47_3: +; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 56 +; RV64I-ZALRSC-NEXT: srai a0, a0, 56 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic %2 = extractvalue { i8, i1 } %1, 0 ret i8 %2 @@ -3911,6 +5759,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: zext.b a1, a1 +; RV32I-ZALRSC-NEXT: zext.b a2, a2 +; RV32I-ZALRSC-NEXT: sll a4, a4, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a3) +; RV32I-ZALRSC-NEXT: and a5, a2, a4 +; RV32I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a5, a2, a0 +; RV32I-ZALRSC-NEXT: and a5, a5, a4 +; RV32I-ZALRSC-NEXT: xor a5, a2, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV32I-ZALRSC-NEXT: bnez a5, .LBB48_1 +; RV32I-ZALRSC-NEXT: .LBB48_3: +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -3949,6 +5823,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; RV64IA-NEXT: xor a1, a1, a2 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: zext.b a1, a1 +; RV64I-ZALRSC-NEXT: zext.b a2, a2 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a3) +; RV64I-ZALRSC-NEXT: and a5, a2, a4 +; RV64I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a5, a2, a0 +; RV64I-ZALRSC-NEXT: and a5, a5, a4 +; RV64I-ZALRSC-NEXT: xor a5, a2, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV64I-ZALRSC-NEXT: bnez a5, .LBB48_1 +; RV64I-ZALRSC-NEXT: .LBB48_3: +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic %2 = extractvalue { i8, i1 } %1, 1 ret i1 %2 @@ -3996,6 +5896,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a4, 16 +; RV32I-ZALRSC-NEXT: addi a4, a4, -1 +; RV32I-ZALRSC-NEXT: sll a5, a4, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a4 +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a5 +; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a2 +; RV32I-ZALRSC-NEXT: and a6, a6, a5 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV32I-ZALRSC-NEXT: .LBB49_3: +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: slli a0, a0, 16 +; RV32I-ZALRSC-NEXT: srai a0, a0, 16 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4036,6 +5963,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a4, 16 +; RV64I-ZALRSC-NEXT: addi a4, a4, -1 +; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a4 +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a5 +; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a2 +; RV64I-ZALRSC-NEXT: and a6, a6, a5 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1 +; RV64I-ZALRSC-NEXT: .LBB49_3: +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: slli a0, a0, 48 +; RV64I-ZALRSC-NEXT: srai a0, a0, 48 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic %2 = extractvalue { i16, i1 } %1, 0 ret i16 %2 @@ -4082,6 +6036,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: lui a4, 16 +; RV32I-ZALRSC-NEXT: addi a4, a4, -1 +; RV32I-ZALRSC-NEXT: sll a5, a4, a0 +; RV32I-ZALRSC-NEXT: and a1, a1, a4 +; RV32I-ZALRSC-NEXT: and a2, a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a2, (a3) +; RV32I-ZALRSC-NEXT: and a4, a2, a5 +; RV32I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV32I-ZALRSC-NEXT: xor a4, a2, a0 +; RV32I-ZALRSC-NEXT: and a4, a4, a5 +; RV32I-ZALRSC-NEXT: xor a4, a2, a4 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB50_1 +; RV32I-ZALRSC-NEXT: .LBB50_3: +; RV32I-ZALRSC-NEXT: and a2, a2, a5 +; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4121,6 +6102,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; RV64IA-NEXT: xor a1, a1, a2 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: lui a4, 16 +; RV64I-ZALRSC-NEXT: addi a4, a4, -1 +; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 +; RV64I-ZALRSC-NEXT: and a1, a1, a4 +; RV64I-ZALRSC-NEXT: and a2, a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a2, (a3) +; RV64I-ZALRSC-NEXT: and a4, a2, a5 +; RV64I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 +; RV64I-ZALRSC-NEXT: xor a4, a2, a0 +; RV64I-ZALRSC-NEXT: and a4, a4, a5 +; RV64I-ZALRSC-NEXT: xor a4, a2, a4 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB50_1 +; RV64I-ZALRSC-NEXT: .LBB50_3: +; RV64I-ZALRSC-NEXT: and a2, a2, a5 +; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic %2 = extractvalue { i16, i1 } %1, 1 ret i1 %2 @@ -4159,6 +6167,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext % ; RV32IA-ZACAS-NEXT: mv a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB51_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB51_1 +; RV32I-ZALRSC-NEXT: .LBB51_3: +; RV32I-ZALRSC-NEXT: mv a0, a3 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4190,6 +6210,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext % ; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0) ; RV64IA-ZACAS-NEXT: mv a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB51_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB51_1 +; RV64I-ZALRSC-NEXT: .LBB51_3: +; RV64I-ZALRSC-NEXT: mv a0, a3 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic %2 = extractvalue { i32, i1 } %1, 0 ret i32 %2 @@ -4230,6 +6262,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32 ; RV32IA-ZACAS-NEXT: seqz a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB52_3 +; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV32I-ZALRSC-NEXT: bnez a4, .LBB52_1 +; RV32I-ZALRSC-NEXT: .LBB52_3: +; RV32I-ZALRSC-NEXT: xor a1, a3, a1 +; RV32I-ZALRSC-NEXT: seqz a0, a1 +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -4263,6 +6308,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32 ; RV64IA-ZACAS-NEXT: xor a1, a3, a1 ; RV64IA-ZACAS-NEXT: seqz a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB52_3 +; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB52_1 +; RV64I-ZALRSC-NEXT: .LBB52_3: +; RV64I-ZALRSC-NEXT: xor a1, a3, a1 +; RV64I-ZALRSC-NEXT: seqz a0, a1 +; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic %2 = extractvalue { i32, i1 } %1, 1 ret i1 %2 @@ -4304,6 +6362,27 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB53_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB53_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB53_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB53_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4339,6 +6418,28 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: li a2, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB53_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB53_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB53_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB53_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4391,6 +6492,27 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB54_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB54_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: add a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB54_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB54_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: addi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4426,6 +6548,28 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: addi a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB54_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB54_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: add a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB54_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB54_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: addi a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4479,6 +6623,27 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB55_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB55_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: sub a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB55_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB55_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: addi a2, a0, -1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4514,6 +6679,28 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: addi a2, a0, -1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB55_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB55_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: sub a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB55_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB55_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: addi a2, a1, -1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4567,6 +6754,27 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB56_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB56_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: and a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB56_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB56_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: andi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4602,6 +6810,28 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: andi a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB56_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB56_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: and a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB56_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB56_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4685,6 +6915,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-ZACAS-NEXT: mv a0, a1 ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB57_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB57_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: and a3, a0, a2 +; RV32I-ZALRSC-NEXT: not a3, a3 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB57_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB57_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: andi a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_nand_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4750,6 +7002,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-ZACAS-NEXT: sw a2, 0(a0) ; RV64IA-ZACAS-NEXT: mv a0, a1 ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB57_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB57_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: and a3, a0, a2 +; RV64I-ZALRSC-NEXT: not a3, a3 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB57_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB57_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: andi a2, a0, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4803,6 +7077,27 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB58_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB58_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: or a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB58_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB58_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: ori a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4838,6 +7133,28 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV64IA-NEXT: ori a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB58_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB58_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: or a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB58_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB58_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: ori a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -4891,6 +7208,27 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB59_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB59_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: xor a3, a0, a2 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB59_3 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB59_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: xori a2, a0, 1 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a1, 1 @@ -4926,6 +7264,28 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: xori a2, a0, 1 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB59_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB59_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: xor a3, a1, a2 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB59_3 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB59_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: xori a2, a1, 1 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5007,6 +7367,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB60_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB60_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bge a3, a2, .LBB60_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB60_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB60_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB60_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: bgtz a0, .LBB60_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB60_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5070,6 +7461,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB60_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB60_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB60_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB60_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB60_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB60_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB60_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: bgtz a0, .LBB60_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB60_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5155,6 +7577,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB61_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB61_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bge a2, a3, .LBB61_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB61_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB61_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB61_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: blez a0, .LBB61_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB61_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5220,6 +7673,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB61_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB61_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB61_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB61_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB61_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB61_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB61_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: blez a0, .LBB61_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB61_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5290,6 +7774,34 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB62_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB62_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB62_5: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB62_3 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB62_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: seqz a2, a0 +; RV32I-ZALRSC-NEXT: add a2, a0, a2 +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umax_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5347,6 +7859,35 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: add a2, a0, a2 ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a1, a1, 1 +; RV64I-ZALRSC-NEXT: beqz a1, .LBB62_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB62_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a1, (a0) +; RV64I-ZALRSC-NEXT: mv a3, a1 +; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB62_5: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB62_3 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB62_2: # %else +; RV64I-ZALRSC-NEXT: lw a1, 0(a0) +; RV64I-ZALRSC-NEXT: seqz a2, a1 +; RV64I-ZALRSC-NEXT: add a2, a1, a2 +; RV64I-ZALRSC-NEXT: sw a2, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a1 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5434,6 +7975,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: sw a2, 0(a1) ; RV32IA-NEXT: ret ; +; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: andi a2, a1, 1 +; RV32I-ZALRSC-NEXT: mv a1, a0 +; RV32I-ZALRSC-NEXT: beqz a2, .LBB63_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB63_5: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w a0, (a1) +; RV32I-ZALRSC-NEXT: mv a3, a0 +; RV32I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7 +; RV32I-ZALRSC-NEXT: # %bb.6: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV32I-ZALRSC-NEXT: mv a3, a2 +; RV32I-ZALRSC-NEXT: .LBB63_7: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB63_5 +; RV32I-ZALRSC-NEXT: # %bb.8: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB63_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a1) +; RV32I-ZALRSC-NEXT: li a3, 1 +; RV32I-ZALRSC-NEXT: mv a2, a0 +; RV32I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4 +; RV32I-ZALRSC-NEXT: # %bb.3: # %else +; RV32I-ZALRSC-NEXT: li a2, 1 +; RV32I-ZALRSC-NEXT: .LBB63_4: # %else +; RV32I-ZALRSC-NEXT: sw a2, 0(a1) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -5501,6 +8074,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: .LBB63_4: # %else ; RV64IA-NEXT: sw a2, 0(a1) ; RV64IA-NEXT: ret +; +; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: andi a2, a1, 1 +; RV64I-ZALRSC-NEXT: mv a1, a0 +; RV64I-ZALRSC-NEXT: beqz a2, .LBB63_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB63_5: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w a0, (a1) +; RV64I-ZALRSC-NEXT: mv a3, a0 +; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7 +; RV64I-ZALRSC-NEXT: # %bb.6: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV64I-ZALRSC-NEXT: mv a3, a2 +; RV64I-ZALRSC-NEXT: .LBB63_7: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1) +; RV64I-ZALRSC-NEXT: bnez a3, .LBB63_5 +; RV64I-ZALRSC-NEXT: # %bb.8: # %then +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB63_2: # %else +; RV64I-ZALRSC-NEXT: lw a0, 0(a1) +; RV64I-ZALRSC-NEXT: li a3, 1 +; RV64I-ZALRSC-NEXT: mv a2, a0 +; RV64I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4 +; RV64I-ZALRSC-NEXT: # %bb.3: # %else +; RV64I-ZALRSC-NEXT: li a2, 1 +; RV64I-ZALRSC-NEXT: .LBB63_4: # %else +; RV64I-ZALRSC-NEXT: sw a2, 0(a1) +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: @@ -5570,6 +8175,25 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV32IA-ZACAS-NEXT: lw a0, 0(a0) ; RV32IA-ZACAS-NEXT: ret ; +; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb: +; RV32I-ZALRSC: # %bb.0: +; RV32I-ZALRSC-NEXT: mv a4, a0 +; RV32I-ZALRSC-NEXT: beqz a3, .LBB64_2 +; RV32I-ZALRSC-NEXT: # %bb.1: # %then +; RV32I-ZALRSC-NEXT: .LBB64_3: # %then +; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a0, (a4) +; RV32I-ZALRSC-NEXT: bne a0, a1, .LBB64_5 +; RV32I-ZALRSC-NEXT: # %bb.4: # %then +; RV32I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1 +; RV32I-ZALRSC-NEXT: sc.w.rl a3, a2, (a4) +; RV32I-ZALRSC-NEXT: bnez a3, .LBB64_3 +; RV32I-ZALRSC-NEXT: .LBB64_5: # %then +; RV32I-ZALRSC-NEXT: ret +; RV32I-ZALRSC-NEXT: .LBB64_2: # %else +; RV32I-ZALRSC-NEXT: lw a0, 0(a4) +; RV32I-ZALRSC-NEXT: ret +; ; RV64I-LABEL: cmpxchg_i32_monotonic_crossbb: ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a3, .LBB64_2 @@ -5620,6 +8244,26 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV64IA-ZACAS-NEXT: .LBB64_2: # %else ; RV64IA-ZACAS-NEXT: lw a0, 0(a0) ; RV64IA-ZACAS-NEXT: ret +; +; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb: +; RV64I-ZALRSC: # %bb.0: +; RV64I-ZALRSC-NEXT: beqz a3, .LBB64_2 +; RV64I-ZALRSC-NEXT: # %bb.1: # %then +; RV64I-ZALRSC-NEXT: .LBB64_3: # %then +; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB64_5 +; RV64I-ZALRSC-NEXT: # %bb.4: # %then +; RV64I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a2, (a0) +; RV64I-ZALRSC-NEXT: bnez a4, .LBB64_3 +; RV64I-ZALRSC-NEXT: .LBB64_5: # %then +; RV64I-ZALRSC-NEXT: sext.w a0, a3 +; RV64I-ZALRSC-NEXT: ret +; RV64I-ZALRSC-NEXT: .LBB64_2: # %else +; RV64I-ZALRSC-NEXT: lw a3, 0(a0) +; RV64I-ZALRSC-NEXT: sext.w a0, a3 +; RV64I-ZALRSC-NEXT: ret br i1 %c, label %then, label %else then: diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll index cce1eda..1aee688 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} %"__cblayout_$Globals" = type <{ i32 }> @@ -9,7 +10,6 @@ ; CHECK: OpCapability Shader ; CHECK: OpCapability StorageTexelBufferArrayDynamicIndexingEXT - define void @main() local_unnamed_addr #0 { entry: %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") @@ -19,4 +19,8 @@ entry: %2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 98) store i32 99, ptr addrspace(11) %2, align 4 ret void -}
\ No newline at end of file +} + +!hlsl.cbs = !{!0} + +!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll index da69a2f..163fc9d 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} %"__cblayout_$Globals" = type <{ i32 }> @@ -19,3 +20,7 @@ entry: store i32 98, ptr addrspace(11) %2, align 4 ret void } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll new file mode 100644 index 0000000..7c44b6d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; When accessing read-only `Buffer` types, SPIR-V should use `OpImageFetch` instead of `OpImageRead`. +; https://github.com/llvm/llvm-project/issues/162891 + +; CHECK-DAG: OpCapability SampledBuffer +; CHECK-DAG: OpCapability ImageBuffer +; CHECK-DAG: [[TypeInt:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[TypeImageBuffer:%[0-9]+]] = OpTypeImage [[TypeInt]] Buffer 2 0 0 1 Unknown +; CHECK-DAG: [[TypePtrImageBuffer:%[0-9]+]] = OpTypePointer UniformConstant [[TypeImageBuffer]] +; CHECK-DAG: [[TypeVector:%[0-9]+]] = OpTypeVector [[TypeInt]] 4 +; CHECK-DAG: [[Index:%[0-9]+]] = OpConstant [[TypeInt]] 98 +; CHECK-DAG: [[Variable:%[0-9]+]] = OpVariable [[TypePtrImageBuffer]] UniformConstant +@.str = private unnamed_addr constant [7 x i8] c"rwbuff\00", align 1 +@.str.2 = private unnamed_addr constant [5 x i8] c"buff\00", align 1 +@.str.4 = private unnamed_addr constant [8 x i8] c"unknown\00", align 1 + +define void @main() local_unnamed_addr #0 { + %1 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_1_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @.str.2) + %3 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_0_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @.str.4) + %4 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_1_0t(target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) %2, i32 98) +; CHECK: [[Load:%[0-9]+]] = OpLoad [[TypeImageBuffer]] [[Variable]] +; CHECK: [[ImageFetch:%[0-9]+]] = OpImageFetch [[TypeVector]] [[Load]] [[Index]] +; CHECK: {{.*}} = OpCompositeExtract [[TypeInt]] [[ImageFetch]] 0 + %5 = load i32, ptr addrspace(11) %4, align 4 + %6 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 99) + store i32 %5, ptr addrspace(11) %6, align 4 + %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 96) +; CHECK: {{%[0-9]+}} = OpLoad {{.*}} +; CHECK: {{%[0-9]+}} = OpImageRead {{.*}} + %8 = load i32, ptr addrspace(11) %7, align 4 + %9 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 97) + store i32 %8, ptr addrspace(11) %9, align 4 + %10 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_0_0t(target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) %3, i32 94) +; CHECK: {{%[0-9]+}} = OpLoad {{.*}} +; CHECK: {{%[0-9]+}} = OpImageRead {{.*}} + %11 = load i32, ptr addrspace(11) %10, align 4 + %12 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 95) + store i32 %11, ptr addrspace(11) %12, align 4 + ret void +} diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll new file mode 100644 index 0000000..13149d7 --- /dev/null +++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll @@ -0,0 +1,3021 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=CHECK,AVX512 + +; +; CTPOP +; + +define i32 @test_ctpop_i128(i128 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq %rsi, %rcx +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i128 @llvm.ctpop.i128(i128 %a0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i128(ptr %p0) nounwind { +; CHECK-LABEL: load_ctpop_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq 8(%rdi), %rcx +; CHECK-NEXT: popcntq (%rdi), %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.ctpop.i128(i128 %a0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i256(i256 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i256: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq %rcx, %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: popcntq %rdx, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: popcntq %rsi, %rdx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i256 @llvm.ctpop.i256(i256 %a0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i256(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i256: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 24(%rdi), %rcx +; SSE-NEXT: popcntq 16(%rdi), %rdx +; SSE-NEXT: popcntq 8(%rdi), %rsi +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 24(%rdi), %rax +; AVX2-NEXT: popcntq 16(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: popcntq 8(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: popcntq 16(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: popcntq 8(%rdi), %rdx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.ctpop.i256(i256 %a0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i512(i512 %a0) nounwind { +; CHECK-LABEL: test_ctpop_i512: +; CHECK: # %bb.0: +; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: addl %eax, %r10d +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %r9, %rax +; CHECK-NEXT: popcntq %r8, %r8 +; CHECK-NEXT: addl %eax, %r8d +; CHECK-NEXT: addl %r10d, %r8d +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rcx, %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: popcntq %rdx, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: popcntq %rsi, %rdx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popcntq %rdi, %rax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %cnt = call i512 @llvm.ctpop.i512(i512 %a0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i512(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i512: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 56(%rdi), %rax +; SSE-NEXT: popcntq 48(%rdi), %rcx +; SSE-NEXT: popcntq 40(%rdi), %rdx +; SSE-NEXT: popcntq 32(%rdi), %rsi +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 24(%rdi), %rax +; SSE-NEXT: addl %ecx, %esi +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq 16(%rdi), %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 8(%rdi), %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 56(%rdi), %rax +; AVX2-NEXT: popcntq 48(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 40(%rdi), %rax +; AVX2-NEXT: popcntq 32(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %ecx, %edx +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq 24(%rdi), %rcx +; AVX2-NEXT: popcntq 16(%rdi), %rsi +; AVX2-NEXT: popcntq 8(%rdi), %r8 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %ecx, %esi +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %esi, %eax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 56(%rdi), %rax +; AVX512-NEXT: popcntq 48(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 40(%rdi), %rax +; AVX512-NEXT: popcntq 32(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: addl %ecx, %edx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq 16(%rdi), %rcx +; AVX512-NEXT: popcntq 8(%rdi), %rsi +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %esi, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.ctpop.i512(i512 %a0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctpop_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_ctpop_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: addl %eax, %r10d +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl %r11d, %eax +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %ebx, %ebx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: addl %r10d, %eax +; SSE-NEXT: addl %r11d, %ebx +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %r10d, %r10d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: addl %r11d, %r10d +; SSE-NEXT: addl %ebx, %r10d +; SSE-NEXT: xorl %r11d, %r11d +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: xorl %ebx, %ebx +; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: addl %eax, %r10d +; SSE-NEXT: addl %r11d, %ebx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %r9, %rax +; SSE-NEXT: popcntq %r8, %r8 +; SSE-NEXT: addl %eax, %r8d +; SSE-NEXT: addl %ebx, %r8d +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %rcx, %rax +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq %rdx, %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq %rsi, %rdx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq %rdi, %rax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: addl %r8d, %eax +; SSE-NEXT: addl %r10d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctpop_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: addl %eax, %r10d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl %eax, %r11d +; AVX2-NEXT: addl %r10d, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r14 +; AVX2-NEXT: addl %eax, %ebx +; AVX2-NEXT: xorl %r10d, %r10d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: addl %r14d, %r10d +; AVX2-NEXT: addl %ebx, %r10d +; AVX2-NEXT: addl %r11d, %r10d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: xorl %r11d, %r11d +; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl %eax, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %r9, %rax +; AVX2-NEXT: popcntq %r8, %r8 +; AVX2-NEXT: addl %eax, %r8d +; AVX2-NEXT: addl %r11d, %r8d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %rcx, %rax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq %rdx, %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq %rsi, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq %rdi, %rax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %r10d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctpop_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: addl %eax, %r10d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl %eax, %r11d +; AVX512-NEXT: addl %r10d, %r11d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: xorl %ebx, %ebx +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: xorl %r14d, %r14d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r14 +; AVX512-NEXT: addl %eax, %ebx +; AVX512-NEXT: xorl %r10d, %r10d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: addl %r14d, %r10d +; AVX512-NEXT: addl %ebx, %r10d +; AVX512-NEXT: addl %r11d, %r10d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: xorl %r11d, %r11d +; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl %eax, %r11d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %r9, %rax +; AVX512-NEXT: popcntq %r8, %r8 +; AVX512-NEXT: addl %eax, %r8d +; AVX512-NEXT: addl %r11d, %r8d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %rcx, %rax +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq %rdx, %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq %rsi, %rdx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq %rdi, %rax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: addl %r8d, %eax +; AVX512-NEXT: addl %r10d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctpop_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_ctpop_i1024: +; SSE: # %bb.0: +; SSE-NEXT: popcntq 120(%rdi), %rax +; SSE-NEXT: popcntq 112(%rdi), %rcx +; SSE-NEXT: popcntq 104(%rdi), %rdx +; SSE-NEXT: popcntq 96(%rdi), %rsi +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 88(%rdi), %rax +; SSE-NEXT: addl %ecx, %esi +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 80(%rdi), %rdx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 72(%rdi), %rax +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: popcntq 64(%rdi), %rcx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: addl %edx, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 56(%rdi), %rax +; SSE-NEXT: addl %esi, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 48(%rdi), %rdx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 40(%rdi), %rax +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: popcntq 32(%rdi), %rsi +; SSE-NEXT: addl %eax, %esi +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq 24(%rdi), %rax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: popcntq 16(%rdi), %rdx +; SSE-NEXT: popcntq 8(%rdi), %r8 +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: popcntq (%rdi), %rax +; SSE-NEXT: addl %r8d, %eax +; SSE-NEXT: addl %edx, %eax +; SSE-NEXT: addl %esi, %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctpop_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: popcntq 120(%rdi), %rax +; AVX2-NEXT: popcntq 112(%rdi), %rcx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 104(%rdi), %rax +; AVX2-NEXT: popcntq 96(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %ecx, %edx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 88(%rdi), %rax +; AVX2-NEXT: popcntq 80(%rdi), %rsi +; AVX2-NEXT: popcntq 72(%rdi), %r8 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: popcntq 64(%rdi), %rcx +; AVX2-NEXT: addl %eax, %esi +; AVX2-NEXT: addl %r8d, %ecx +; AVX2-NEXT: addl %esi, %ecx +; AVX2-NEXT: addl %edx, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 56(%rdi), %rax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq 48(%rdi), %rdx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: popcntq 40(%rdi), %rsi +; AVX2-NEXT: xorl %r8d, %r8d +; AVX2-NEXT: popcntq 32(%rdi), %r8 +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: addl %esi, %r8d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq 24(%rdi), %rax +; AVX2-NEXT: addl %edx, %r8d +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: popcntq 16(%rdi), %rdx +; AVX2-NEXT: addl %eax, %edx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: popcntq 8(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: popcntq (%rdi), %rax +; AVX2-NEXT: addl %esi, %eax +; AVX2-NEXT: addl %edx, %eax +; AVX2-NEXT: addl %r8d, %eax +; AVX2-NEXT: addl %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctpop_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: popcntq 120(%rdi), %rax +; AVX512-NEXT: popcntq 112(%rdi), %rcx +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 104(%rdi), %rax +; AVX512-NEXT: popcntq 96(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: addl %ecx, %edx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 88(%rdi), %rax +; AVX512-NEXT: popcntq 80(%rdi), %rsi +; AVX512-NEXT: popcntq 72(%rdi), %r8 +; AVX512-NEXT: addl %eax, %esi +; AVX512-NEXT: xorl %ecx, %ecx +; AVX512-NEXT: popcntq 64(%rdi), %rcx +; AVX512-NEXT: addl %r8d, %ecx +; AVX512-NEXT: addl %esi, %ecx +; AVX512-NEXT: addl %edx, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 56(%rdi), %rax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq 48(%rdi), %rdx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: popcntq 40(%rdi), %rsi +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: xorl %r8d, %r8d +; AVX512-NEXT: popcntq 32(%rdi), %r8 +; AVX512-NEXT: addl %esi, %r8d +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq 24(%rdi), %rax +; AVX512-NEXT: addl %edx, %r8d +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: popcntq 16(%rdi), %rdx +; AVX512-NEXT: addl %eax, %edx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: popcntq 8(%rdi), %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: popcntq (%rdi), %rax +; AVX512-NEXT: addl %esi, %eax +; AVX512-NEXT: addl %edx, %eax +; AVX512-NEXT: addl %r8d, %eax +; AVX512-NEXT: addl %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +; +; CTLZ +; + +define i32 @test_ctlz_i128(i128 %a0) nounwind { +; SSE-LABEL: test_ctlz_i128: +; SSE: # %bb.0: +; SSE-NEXT: bsrq %rsi, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: lzcntq %rsi, %rcx +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: lzcntq %rsi, %rcx +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i128(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: bsrq %rcx, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 8(%rdi), %rcx +; AVX2-NEXT: lzcntq %rcx, %rdx +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 8(%rdi), %rcx +; AVX512-NEXT: lzcntq %rcx, %rdx +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i256(i256 %a0) nounwind { +; SSE-LABEL: test_ctlz_i256: +; SSE: # %bb.0: +; SSE-NEXT: bsrq %rcx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rdx, %r8 +; SSE-NEXT: xorl $63, %r8d +; SSE-NEXT: orl $64, %r8d +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: bsrq %rsi, %r9 +; SSE-NEXT: xorl $63, %r9d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: lzcntq %rcx, %rax +; AVX2-NEXT: lzcntq %rdx, %r8 +; AVX2-NEXT: addl $64, %r8d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %r8d +; AVX2-NEXT: lzcntq %rsi, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: lzcntq %rcx, %rax +; AVX512-NEXT: lzcntq %rdx, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: lzcntq %rsi, %r9 +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rcx, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i256(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i256: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rcx +; SSE-NEXT: movq 24(%rdi), %rdx +; SSE-NEXT: bsrq %rdx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rcx, %rsi +; SSE-NEXT: xorl $63, %esi +; SSE-NEXT: orl $64, %esi +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %esi +; SSE-NEXT: movq 8(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %r9 +; SSE-NEXT: xorl $63, %r9d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 16(%rdi), %rcx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: lzcntq %rcx, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %esi +; AVX2-NEXT: movq 8(%rdi), %r8 +; AVX2-NEXT: lzcntq %r8, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 8(%rdi), %rcx +; AVX512-NEXT: movq 16(%rdi), %rdx +; AVX512-NEXT: movq 24(%rdi), %rsi +; AVX512-NEXT: lzcntq %rsi, %rax +; AVX512-NEXT: lzcntq %rdx, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: lzcntq %rcx, %r9 +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i512(i512 %a0) nounwind { +; SSE-LABEL: test_ctlz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: bsrq %r11, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r10, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %r9, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r8, %rbx +; SSE-NEXT: xorl $63, %ebx +; SSE-NEXT: orl $64, %ebx +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: subl $-128, %ebx +; SSE-NEXT: movq %r10, %rax +; SSE-NEXT: orq %r11, %rax +; SSE-NEXT: cmovnel %r14d, %ebx +; SSE-NEXT: bsrq %rcx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rdx, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %rsi, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: lzcntq %r11, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %r10, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %r11, %r11 +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r9, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: lzcntq %r8, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r9, %r9 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: subl $-128, %ebx +; AVX2-NEXT: movq %r10, %rax +; AVX2-NEXT: orq %r11, %rax +; AVX2-NEXT: cmovnel %r14d, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rcx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %rdx, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: lzcntq %rsi, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: orq %r10, %r8 +; AVX2-NEXT: orq %r9, %r8 +; AVX2-NEXT: cmovnel %ebx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: lzcntq %r11, %rax +; AVX512-NEXT: lzcntq %r10, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %r9, %rax +; AVX512-NEXT: lzcntq %r8, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %r10, %rax +; AVX512-NEXT: orq %r11, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: lzcntq %rcx, %rax +; AVX512-NEXT: lzcntq %rdx, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %rsi, %r15 +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r15d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rcx, %rdx +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r11, %r9 +; AVX512-NEXT: orq %r10, %r8 +; AVX512-NEXT: orq %r9, %r8 +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: retq + %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i512(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 8(%rdi), %r10 +; SSE-NEXT: movq 16(%rdi), %r9 +; SSE-NEXT: movq 32(%rdi), %rcx +; SSE-NEXT: movq 40(%rdi), %rdx +; SSE-NEXT: movq 48(%rdi), %rsi +; SSE-NEXT: movq 56(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rsi, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %rdx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %rcx, %r11 +; SSE-NEXT: xorl $63, %r11d +; SSE-NEXT: orl $64, %r11d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: movq 24(%rdi), %rbx +; SSE-NEXT: subl $-128, %r11d +; SSE-NEXT: movq %rsi, %rax +; SSE-NEXT: orq %r8, %rax +; SSE-NEXT: cmovnel %r14d, %r11d +; SSE-NEXT: bsrq %rbx, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r9, %r14 +; SSE-NEXT: xorl $63, %r14d +; SSE-NEXT: orl $64, %r14d +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: bsrq %r10, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rbx, %r9 +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 8(%rdi), %r10 +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: movq 32(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %rdx +; AVX2-NEXT: movq 48(%rdi), %rsi +; AVX2-NEXT: movq 56(%rdi), %r8 +; AVX2-NEXT: lzcntq %r8, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: lzcntq %rsi, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: lzcntq %rcx, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: subl $-128, %r11d +; AVX2-NEXT: movq %rsi, %rax +; AVX2-NEXT: orq %r8, %rax +; AVX2-NEXT: cmovnel %ebx, %r11d +; AVX2-NEXT: movq 24(%rdi), %rbx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rbx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: lzcntq %r9, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: lzcntq %r10, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rbx, %r9 +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %r11d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 8(%rdi), %r11 +; AVX512-NEXT: movq 16(%rdi), %r9 +; AVX512-NEXT: movq 24(%rdi), %r10 +; AVX512-NEXT: movq 32(%rdi), %rcx +; AVX512-NEXT: movq 40(%rdi), %rdx +; AVX512-NEXT: movq 48(%rdi), %rsi +; AVX512-NEXT: movq 56(%rdi), %r8 +; AVX512-NEXT: lzcntq %r8, %rax +; AVX512-NEXT: lzcntq %rsi, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq %rdx, %rax +; AVX512-NEXT: lzcntq %rcx, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %rsi, %rax +; AVX512-NEXT: orq %r8, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: lzcntq %r10, %rax +; AVX512-NEXT: lzcntq %r9, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: lzcntq %r11, %rdi +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r10, %r9 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %rdx +; AVX512-NEXT: orq %rsi, %rcx +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_ctlz_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_ctlz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq %r9, %r11 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq %rdx, %r12 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r15, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r14, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: bsrq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: orl $64, %eax +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r15, %rdx +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: bsrq %r13, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: bsrq %rbx, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: bsrq %r10, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: bsrq %r8, %rbp +; SSE-NEXT: xorl $63, %ebp +; SSE-NEXT: orl $64, %ebp +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %rbx, %rcx +; SSE-NEXT: orq %r13, %rcx +; SSE-NEXT: cmovnel %edx, %ebp +; SSE-NEXT: addl $256, %ebp # imm = 0x100 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; SSE-NEXT: orq %r14, %rcx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: orq %r15, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; SSE-NEXT: bsrq %r14, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: bsrq %r15, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r11, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r9, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %eax, %edx +; SSE-NEXT: subl $-128, %edx +; SSE-NEXT: movq %r15, %rax +; SSE-NEXT: orq %r14, %rax +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; SSE-NEXT: bsrq %r15, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r12, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r15, %r15 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq %rdi, %rax +; SSE-NEXT: bsrq %rsi, %rdi +; SSE-NEXT: xorl $63, %edi +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %edi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r15, %r12 +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq %r14, %r11 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: orq %r13, %r10 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: orq %rbx, %r8 +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ctlz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq %r9, %r14 +; AVX2-NEXT: movq %r8, %r11 +; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r12, %rcx +; AVX2-NEXT: xorl %r9d, %r9d +; AVX2-NEXT: lzcntq %r8, %r9 +; AVX2-NEXT: addl $64, %r9d +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ecx, %r9d +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r10, %rsi +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rax, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %esi, %ecx +; AVX2-NEXT: subl $-128, %ecx +; AVX2-NEXT: movq %r8, %rsi +; AVX2-NEXT: orq %r12, %rsi +; AVX2-NEXT: cmovnel %r9d, %ecx +; AVX2-NEXT: xorl %edi, %edi +; AVX2-NEXT: lzcntq %rbx, %rdi +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r15, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %edi, %esi +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: lzcntq %r13, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; AVX2-NEXT: xorl %edi, %edi +; AVX2-NEXT: lzcntq %r9, %rdi +; AVX2-NEXT: testq %r9, %r9 +; AVX2-NEXT: cmovnel %edi, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r15, %rdi +; AVX2-NEXT: orq %rbx, %rdi +; AVX2-NEXT: cmovnel %esi, %ebp +; AVX2-NEXT: addl $256, %ebp # imm = 0x100 +; AVX2-NEXT: movq %r10, %rdi +; AVX2-NEXT: orq %r12, %rdi +; AVX2-NEXT: movq %rax, %rsi +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdi, %rsi +; AVX2-NEXT: cmovnel %ecx, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r12, %rcx +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r11, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: lzcntq %r14, %rsi +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovnel %esi, %ecx +; AVX2-NEXT: subl $-128, %ecx +; AVX2-NEXT: movq %rdi, %rsi +; AVX2-NEXT: orq %r12, %rsi +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq %rdx, %rdi +; AVX2-NEXT: lzcntq %rdx, %rdx +; AVX2-NEXT: addl $64, %edx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r10, %rax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %eax, %edx +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: lzcntq %rax, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX2-NEXT: lzcntq %rsi, %r8 +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r10, %rdi +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: orq %r12, %r14 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r14, %r11 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; AVX2-NEXT: orq %rbx, %r9 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r15 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: orq %r15, %r13 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r9, %r13 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ctlz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq %r9, %r14 +; AVX512-NEXT: movq %r8, %r11 +; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: lzcntq %r12, %rcx +; AVX512-NEXT: lzcntq %r8, %r9 +; AVX512-NEXT: addl $64, %r9d +; AVX512-NEXT: testq %r12, %r12 +; AVX512-NEXT: cmovnel %ecx, %r9d +; AVX512-NEXT: lzcntq %r10, %rsi +; AVX512-NEXT: lzcntq %rax, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %esi, %ecx +; AVX512-NEXT: subl $-128, %ecx +; AVX512-NEXT: movq %r8, %rsi +; AVX512-NEXT: orq %r12, %rsi +; AVX512-NEXT: cmovnel %r9d, %ecx +; AVX512-NEXT: lzcntq %rbx, %rdi +; AVX512-NEXT: lzcntq %r15, %rsi +; AVX512-NEXT: addl $64, %esi +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %edi, %esi +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: lzcntq %r13, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; AVX512-NEXT: lzcntq %r9, %rdi +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: cmovnel %edi, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %r15, %rdi +; AVX512-NEXT: orq %rbx, %rdi +; AVX512-NEXT: cmovnel %esi, %ebp +; AVX512-NEXT: addl $256, %ebp # imm = 0x100 +; AVX512-NEXT: movq %r10, %rdi +; AVX512-NEXT: orq %r12, %rdi +; AVX512-NEXT: movq %rax, %rsi +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rdi, %rsi +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; AVX512-NEXT: lzcntq %rdi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: lzcntq %r12, %rcx +; AVX512-NEXT: testq %r12, %r12 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %r11, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: lzcntq %r14, %rsi +; AVX512-NEXT: testq %r14, %r14 +; AVX512-NEXT: cmovnel %esi, %ecx +; AVX512-NEXT: subl $-128, %ecx +; AVX512-NEXT: movq %rdi, %rsi +; AVX512-NEXT: orq %r12, %rsi +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq %rdx, %rdi +; AVX512-NEXT: lzcntq %rdx, %rdx +; AVX512-NEXT: addl $64, %edx +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512-NEXT: lzcntq %r10, %rax +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %edx +; AVX512-NEXT: lzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX512-NEXT: lzcntq %rsi, %r8 +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r10, %rdi +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: orq %r12, %r14 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r14, %r11 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; AVX512-NEXT: orq %rbx, %r9 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r15 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: orq %r15, %r13 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r9, %r13 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_ctlz_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_ctlz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 40(%rdi), %rbp +; SSE-NEXT: movq 64(%rdi), %rbx +; SSE-NEXT: movq 72(%rdi), %r11 +; SSE-NEXT: movq 80(%rdi), %r12 +; SSE-NEXT: movq 88(%rdi), %r14 +; SSE-NEXT: movq 96(%rdi), %rsi +; SSE-NEXT: movq 104(%rdi), %r9 +; SSE-NEXT: movq 112(%rdi), %r10 +; SSE-NEXT: movq 120(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r10, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: bsrq %r9, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: bsrq %rsi, %rax +; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: orl $64, %eax +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r10, %rdx +; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: orq %r8, %rdx +; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: bsrq %r14, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: bsrq %r12, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: orl $64, %edx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %ecx, %edx +; SSE-NEXT: bsrq %r11, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: bsrq %rbx, %r15 +; SSE-NEXT: xorl $63, %r15d +; SSE-NEXT: orl $64, %r15d +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %ecx, %r15d +; SSE-NEXT: subl $-128, %r15d +; SSE-NEXT: movq %r12, %rcx +; SSE-NEXT: orq %r14, %rcx +; SSE-NEXT: cmovnel %edx, %r15d +; SSE-NEXT: movq 48(%rdi), %r12 +; SSE-NEXT: addl $256, %r15d # imm = 0x100 +; SSE-NEXT: movq %r9, %rcx +; SSE-NEXT: orq %r8, %rcx +; SSE-NEXT: movq %rsi, %rdx +; SSE-NEXT: orq %r10, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: movq 56(%rdi), %r13 +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: bsrq %r13, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: bsrq %r12, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movq %rbp, %r10 +; SSE-NEXT: bsrq %rbp, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq 32(%rdi), %r8 +; SSE-NEXT: bsrq %r8, %rbp +; SSE-NEXT: xorl $63, %ebp +; SSE-NEXT: orl $64, %ebp +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %r12, %rax +; SSE-NEXT: orq %r13, %rax +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: movq 24(%rdi), %r9 +; SSE-NEXT: bsrq %r9, %rax +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: movq 16(%rdi), %rsi +; SSE-NEXT: bsrq %rsi, %rcx +; SSE-NEXT: xorl $63, %ecx +; SSE-NEXT: orl $64, %ecx +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $127, %eax +; SSE-NEXT: bsrq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rdi +; SSE-NEXT: bsrq %rdi, %rdx +; SSE-NEXT: xorl $63, %edx +; SSE-NEXT: xorl $63, %eax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %rsi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq %r13, %r10 +; SSE-NEXT: orq %r12, %r8 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r10, %r8 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; SSE-NEXT: orq %r14, %r11 +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; SSE-NEXT: orq %rcx, %rbx +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r11, %rbx +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: load_ctlz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 48(%rdi), %r9 +; AVX2-NEXT: movq 56(%rdi), %rbp +; AVX2-NEXT: movq 64(%rdi), %r11 +; AVX2-NEXT: movq 72(%rdi), %r10 +; AVX2-NEXT: movq 80(%rdi), %r14 +; AVX2-NEXT: movq 88(%rdi), %rbx +; AVX2-NEXT: movq 96(%rdi), %rdx +; AVX2-NEXT: movq 104(%rdi), %r8 +; AVX2-NEXT: movq 112(%rdi), %rsi +; AVX2-NEXT: movq 120(%rdi), %r15 +; AVX2-NEXT: lzcntq %r15, %rax +; AVX2-NEXT: lzcntq %rsi, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: testq %r15, %r15 +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: lzcntq %r8, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %rsi, %r12 +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: orq %r15, %r12 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rbx, %rcx +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: lzcntq %r14, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %ecx, %r13d +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r10, %rcx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: lzcntq %r11, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %ecx, %r12d +; AVX2-NEXT: subl $-128, %r12d +; AVX2-NEXT: movq %r14, %rcx +; AVX2-NEXT: orq %rbx, %rcx +; AVX2-NEXT: cmovnel %r13d, %r12d +; AVX2-NEXT: addl $256, %r12d # imm = 0x100 +; AVX2-NEXT: movq %r8, %rcx +; AVX2-NEXT: orq %r15, %rcx +; AVX2-NEXT: orq %rsi, %rdx +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: cmovnel %eax, %r12d +; AVX2-NEXT: movq %rbp, %r14 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %rbp, %rcx +; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %r9, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rbp, %rbp +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: movq 32(%rdi), %r13 +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: lzcntq %r13, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: movq 40(%rdi), %r8 +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: lzcntq %r8, %rdx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %edx, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r9, %rdx +; AVX2-NEXT: orq %r14, %rdx +; AVX2-NEXT: cmovnel %eax, %ebp +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: lzcntq %r9, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: lzcntq (%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: lzcntq %rsi, %rdi +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %edi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %r9 +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq %r14, %r8 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %r13 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: orq %r15, %rbx +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; AVX2-NEXT: orq %rbx, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX2-NEXT: orq %rcx, %r11 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r10, %r11 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_ctlz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 32(%rdi), %r14 +; AVX512-NEXT: movq 48(%rdi), %rbp +; AVX512-NEXT: movq 64(%rdi), %r11 +; AVX512-NEXT: movq 72(%rdi), %r10 +; AVX512-NEXT: movq 80(%rdi), %rdx +; AVX512-NEXT: movq 88(%rdi), %rbx +; AVX512-NEXT: movq 96(%rdi), %rsi +; AVX512-NEXT: movq 104(%rdi), %r9 +; AVX512-NEXT: movq 112(%rdi), %r8 +; AVX512-NEXT: movq 120(%rdi), %r15 +; AVX512-NEXT: lzcntq %r15, %rax +; AVX512-NEXT: lzcntq %r8, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: testq %r15, %r15 +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: lzcntq %r9, %r12 +; AVX512-NEXT: lzcntq %rsi, %rax +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %r8, %r12 +; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: orq %r15, %r12 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %rbx, %rcx +; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: lzcntq %rdx, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %ecx, %r13d +; AVX512-NEXT: lzcntq %r10, %rcx +; AVX512-NEXT: lzcntq %r11, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %ecx, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %rdx, %rcx +; AVX512-NEXT: orq %rbx, %rcx +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r9, %rcx +; AVX512-NEXT: orq %r15, %rcx +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rcx, %rsi +; AVX512-NEXT: movq 56(%rdi), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: lzcntq %r13, %rcx +; AVX512-NEXT: movq %rbp, %rsi +; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: lzcntq %rbp, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r13, %r13 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: lzcntq %r14, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq 40(%rdi), %r8 +; AVX512-NEXT: lzcntq %r8, %rdx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %edx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %rsi, %rdx +; AVX512-NEXT: orq %r13, %rdx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq 16(%rdi), %r9 +; AVX512-NEXT: lzcntq %r9, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq 24(%rdi), %rdx +; AVX512-NEXT: lzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: lzcntq (%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: lzcntq %rsi, %rdi +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rdx, %r9 +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq %r13, %r8 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %r14 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: orq %r15, %rbx +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; AVX512-NEXT: orq %rbx, %r10 +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX512-NEXT: orq %rcx, %r11 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r10, %r11 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +; +; CTTZ +; + +define i32 @test_cttz_i128(i128 %a0) nounwind { +; SSE-LABEL: test_cttz_i128: +; SSE: # %bb.0: +; SSE-NEXT: rep bsfq %rdi, %rcx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq %rsi, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: tzcntq %rdi, %rcx +; AVX2-NEXT: tzcntq %rsi, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: tzcntq %rdi, %rcx +; AVX512-NEXT: tzcntq %rsi, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i128(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: rep bsfq %rcx, %rdx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 8(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: tzcntq %rcx, %rdx +; AVX2-NEXT: tzcntq 8(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movq (%rdi), %rcx +; AVX512-NEXT: tzcntq %rcx, %rdx +; AVX512-NEXT: tzcntq 8(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i128, ptr %p0 + %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0) + %res = trunc i128 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i256(i256 %a0) nounwind { +; SSE-LABEL: test_cttz_i256: +; SSE: # %bb.0: +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %rsi, %r8 +; SSE-NEXT: addl $64, %r8d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: rep bsfq %rdx, %r9 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rsi, %rdi +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: tzcntq %rsi, %r8 +; AVX2-NEXT: addl $64, %r8d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r8d +; AVX2-NEXT: tzcntq %rdx, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rsi, %rdi +; AVX2-NEXT: cmovnel %r8d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %rsi, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: tzcntq %rdx, %r9 +; AVX512-NEXT: tzcntq %rcx, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdi +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i256(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i256: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rcx +; SSE-NEXT: movq (%rdi), %rdx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %rsi, %r8 +; SSE-NEXT: addl $64, %r8d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r8d +; SSE-NEXT: rep bsfq %rcx, %r9 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 24(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %r9d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rsi, %rdx +; SSE-NEXT: cmovnel %r8d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i256: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rdx +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: tzcntq %rdx, %rsi +; AVX2-NEXT: addl $64, %esi +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %esi +; AVX2-NEXT: movq 16(%rdi), %r8 +; AVX2-NEXT: tzcntq %r8, %r9 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 24(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %r9d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i256: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 16(%rdi), %rcx +; AVX512-NEXT: movq (%rdi), %rdx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %rsi, %r8 +; AVX512-NEXT: addl $64, %r8d +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %r8d +; AVX512-NEXT: tzcntq %rcx, %r9 +; AVX512-NEXT: tzcntq 24(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %r9d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: cmovnel %r8d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %a0 = load i256, ptr %p0 + %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0) + %res = trunc i256 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i512(i512 %a0) nounwind { +; SSE-LABEL: test_cttz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %rsi, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %rcx, %r10 +; SSE-NEXT: addl $64, %r10d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r10d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: subl $-128, %r10d +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovnel %r11d, %r10d +; SSE-NEXT: rep bsfq %r8, %rax +; SSE-NEXT: rep bsfq %r9, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: rep bsfq %rbx, %r14 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %r8 +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %rcx, %rsi +; SSE-NEXT: orq %rdx, %rdi +; SSE-NEXT: orq %rsi, %rdi +; SSE-NEXT: cmovnel %r10d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: tzcntq %rsi, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %rcx, %r10 +; AVX2-NEXT: addl $64, %r10d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r10d +; AVX2-NEXT: subl $-128, %r10d +; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovnel %r11d, %r10d +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r8, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %r9, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: tzcntq %r11, %r14 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r11, %r11 +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %r8 +; AVX2-NEXT: cmovnel %ebx, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %rcx, %rsi +; AVX2-NEXT: orq %rdx, %rdi +; AVX2-NEXT: orq %rsi, %rdi +; AVX2-NEXT: cmovnel %r10d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %rsi, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %rcx, %r10 +; AVX512-NEXT: addl $64, %r10d +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %r10d +; AVX512-NEXT: subl $-128, %r10d +; AVX512-NEXT: movq %rdi, %rax +; AVX512-NEXT: orq %rsi, %rax +; AVX512-NEXT: cmovnel %ebx, %r10d +; AVX512-NEXT: tzcntq %r8, %rax +; AVX512-NEXT: tzcntq %r9, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: tzcntq %r11, %r14 +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %r8 +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %rcx, %rsi +; AVX512-NEXT: orq %rdx, %rdi +; AVX512-NEXT: orq %rsi, %rdi +; AVX512-NEXT: cmovnel %r10d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i512(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 48(%rdi), %r10 +; SSE-NEXT: movq 40(%rdi), %r9 +; SSE-NEXT: movq 24(%rdi), %r8 +; SSE-NEXT: movq 16(%rdi), %rdx +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq %rsi, %rbx +; SSE-NEXT: addl $64, %ebx +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %r8, %r11 +; SSE-NEXT: addl $64, %r11d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %r11d +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: subl $-128, %r11d +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovnel %ebx, %r11d +; SSE-NEXT: rep bsfq %r14, %rax +; SSE-NEXT: rep bsfq %r9, %rbx +; SSE-NEXT: addl $64, %ebx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovnel %eax, %ebx +; SSE-NEXT: rep bsfq %r10, %r15 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 56(%rdi), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %r14 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r8, %rsi +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: cmovnel %r11d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 48(%rdi), %r10 +; AVX2-NEXT: movq 40(%rdi), %r9 +; AVX2-NEXT: movq 24(%rdi), %r8 +; AVX2-NEXT: movq 16(%rdi), %rdx +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %rsi, %rbx +; AVX2-NEXT: addl $64, %ebx +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %eax, %ebx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %r8, %r11 +; AVX2-NEXT: addl $64, %r11d +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %r11d +; AVX2-NEXT: subl $-128, %r11d +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovnel %ebx, %r11d +; AVX2-NEXT: movq 32(%rdi), %rbx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rbx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: tzcntq %r9, %r14 +; AVX2-NEXT: addl $64, %r14d +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %eax, %r14d +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r10, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 56(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %rbx +; AVX2-NEXT: cmovnel %r14d, %eax +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: cmovnel %r11d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 48(%rdi), %r11 +; AVX512-NEXT: movq 40(%rdi), %r9 +; AVX512-NEXT: movq 32(%rdi), %r10 +; AVX512-NEXT: movq 24(%rdi), %r8 +; AVX512-NEXT: movq 16(%rdi), %rdx +; AVX512-NEXT: movq (%rdi), %rcx +; AVX512-NEXT: movq 8(%rdi), %rsi +; AVX512-NEXT: tzcntq %rcx, %rax +; AVX512-NEXT: tzcntq %rsi, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: tzcntq %r8, %rbx +; AVX512-NEXT: addl $64, %ebx +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ebx +; AVX512-NEXT: subl $-128, %ebx +; AVX512-NEXT: movq %rcx, %rax +; AVX512-NEXT: orq %rsi, %rax +; AVX512-NEXT: cmovnel %r14d, %ebx +; AVX512-NEXT: tzcntq %r10, %rax +; AVX512-NEXT: tzcntq %r9, %r14 +; AVX512-NEXT: addl $64, %r14d +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %eax, %r14d +; AVX512-NEXT: tzcntq 56(%rdi), %rax +; AVX512-NEXT: tzcntq %r11, %rdi +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %r10 +; AVX512-NEXT: cmovnel %r14d, %eax +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r8, %rsi +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: orq %rsi, %rcx +; AVX512-NEXT: cmovnel %ebx, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq + %a0 = load i512, ptr %p0 + %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0) + %res = trunc i512 %cnt to i32 + ret i32 %res +} + +define i32 @test_cttz_i1024(i1024 %a0) nounwind { +; SSE-LABEL: test_cttz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq %r9, %r13 +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: movq %rcx, %rbx +; SSE-NEXT: movq %rdx, %r10 +; SSE-NEXT: movq %rsi, %r9 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: rep bsfq %rdi, %rax +; SSE-NEXT: rep bsfq %r9, %r15 +; SSE-NEXT: addl $64, %r15d +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: rep bsfq %r10, %r12 +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %r12d, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %rdi, %r12 +; SSE-NEXT: orq %r9, %r12 +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: rep bsfq %r8, %r15 +; SSE-NEXT: movq %r13, %rcx +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: rep bsfq %r13, %r13 +; SSE-NEXT: addl $64, %r13d +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %r15d, %r13d +; SSE-NEXT: rep bsfq %rdx, %r12 +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r15 +; SSE-NEXT: addl $64, %r15d +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %r12d, %r15d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; SSE-NEXT: subl $-128, %r15d +; SSE-NEXT: movq %r8, %rbp +; SSE-NEXT: orq %rcx, %rbp +; SSE-NEXT: cmovnel %r13d, %r15d +; SSE-NEXT: addl $256, %r15d # imm = 0x100 +; SSE-NEXT: movq %r9, %r13 +; SSE-NEXT: orq %rbx, %r13 +; SSE-NEXT: movq %rdi, %rbp +; SSE-NEXT: orq %r10, %rbp +; SSE-NEXT: orq %r13, %rbp +; SSE-NEXT: cmovnel %eax, %r15d +; SSE-NEXT: rep bsfq %r11, %r13 +; SSE-NEXT: rep bsfq %r12, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovnel %r13d, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r13 +; SSE-NEXT: addl $64, %r13d +; SSE-NEXT: rep bsfq %rsi, %rcx +; SSE-NEXT: testq %rsi, %rsi +; SSE-NEXT: cmovnel %ecx, %r13d +; SSE-NEXT: subl $-128, %r13d +; SSE-NEXT: movq %r11, %rcx +; SSE-NEXT: orq %r12, %rcx +; SSE-NEXT: cmovnel %eax, %r13d +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SSE-NEXT: rep bsfq %rbp, %rcx +; SSE-NEXT: addl $64, %ecx +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: rep bsfq %r8, %rsi +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %rbp, %rdx +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r12 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r12, %r11 +; SSE-NEXT: cmovnel %r13d, %eax +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; SSE-NEXT: orq %rbx, %r9 +; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: orq %r14, %rdi +; SSE-NEXT: orq %r10, %rdi +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r9, %rdi +; SSE-NEXT: cmovnel %r15d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: test_cttz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq %r9, %rbx +; AVX2-NEXT: movq %r8, %r14 +; AVX2-NEXT: movq %rcx, %r11 +; AVX2-NEXT: movq %rdx, %r10 +; AVX2-NEXT: movq %rsi, %r9 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX2-NEXT: tzcntq %rdi, %rax +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r9, %r15 +; AVX2-NEXT: addl $64, %r15d +; AVX2-NEXT: testq %rdi, %rdi +; AVX2-NEXT: cmovnel %eax, %r15d +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %r10, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r11, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %rdi, %r12 +; AVX2-NEXT: orq %r9, %r12 +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r14, %r15 +; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %rbx, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovnel %r15d, %r12d +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %rcx, %r13 +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %rdx, %r15 +; AVX2-NEXT: addl $64, %r15d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %r13d, %r15d +; AVX2-NEXT: subl $-128, %r15d +; AVX2-NEXT: movq %r14, %r13 +; AVX2-NEXT: orq %rbx, %r13 +; AVX2-NEXT: cmovnel %r12d, %r15d +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: addl $256, %r15d # imm = 0x100 +; AVX2-NEXT: movq %r9, %r13 +; AVX2-NEXT: orq %r11, %r13 +; AVX2-NEXT: movq %rdi, %rbp +; AVX2-NEXT: orq %r10, %rbp +; AVX2-NEXT: orq %r13, %rbp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: cmovnel %eax, %r15d +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: tzcntq %r12, %rbp +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r13, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r12, %r12 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: xorl %ebp, %ebp +; AVX2-NEXT: tzcntq %r8, %rbp +; AVX2-NEXT: addl $64, %ebp +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %rsi, %rcx +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %ecx, %ebp +; AVX2-NEXT: subl $-128, %ebp +; AVX2-NEXT: movq %r12, %rcx +; AVX2-NEXT: orq %r13, %rcx +; AVX2-NEXT: cmovnel %eax, %ebp +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %rbx, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX2-NEXT: tzcntq %r8, %rsi +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %rbx, %rdx +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r12 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r13, %r12 +; AVX2-NEXT: cmovnel %ebp, %eax +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX2-NEXT: orq %r11, %r9 +; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; AVX2-NEXT: orq %r14, %rdi +; AVX2-NEXT: orq %r10, %rdi +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r9, %rdi +; AVX2-NEXT: cmovnel %r15d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_cttz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq %r9, %r14 +; AVX512-NEXT: movq %r8, %r15 +; AVX512-NEXT: movq %rcx, %r11 +; AVX512-NEXT: movq %rdx, %r10 +; AVX512-NEXT: movq %rsi, %r9 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: tzcntq %rdi, %rax +; AVX512-NEXT: tzcntq %r9, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rdi, %rdi +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %rdx, %r13 +; AVX512-NEXT: tzcntq %r11, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %r13d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %rdi, %r13 +; AVX512-NEXT: orq %r9, %r13 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: tzcntq %r8, %r12 +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: tzcntq %r14, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %r12d, %r13d +; AVX512-NEXT: tzcntq %rcx, %rbp +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %ebp, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %r8, %rbp +; AVX512-NEXT: orq %r14, %rbp +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r9, %r13 +; AVX512-NEXT: orq %r11, %r13 +; AVX512-NEXT: movq %rdi, %rbp +; AVX512-NEXT: orq %rdx, %rbp +; AVX512-NEXT: orq %r13, %rbp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %rbx, %rbp +; AVX512-NEXT: tzcntq %r13, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: tzcntq %rsi, %rcx +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %rbx, %rcx +; AVX512-NEXT: orq %r13, %rcx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; AVX512-NEXT: tzcntq %r14, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; AVX512-NEXT: tzcntq %r8, %rsi +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %esi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r14, %rdx +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r13, %rbx +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX512-NEXT: orq %r11, %r9 +; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; AVX512-NEXT: orq %r15, %rdi +; AVX512-NEXT: orq %r10, %rdi +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r9, %rdi +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} + +define i32 @load_cttz_i1024(ptr %p0) nounwind { +; SSE-LABEL: load_cttz_i1024: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movq 88(%rdi), %r10 +; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 40(%rdi), %rsi +; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movq 24(%rdi), %r9 +; SSE-NEXT: movq 16(%rdi), %r15 +; SSE-NEXT: movq (%rdi), %r8 +; SSE-NEXT: movq 8(%rdi), %r11 +; SSE-NEXT: rep bsfq %r8, %rax +; SSE-NEXT: rep bsfq %r11, %rdx +; SSE-NEXT: addl $64, %edx +; SSE-NEXT: testq %r8, %r8 +; SSE-NEXT: cmovnel %eax, %edx +; SSE-NEXT: rep bsfq %r15, %rbx +; SSE-NEXT: rep bsfq %r9, %rax +; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r15, %r15 +; SSE-NEXT: cmovnel %ebx, %eax +; SSE-NEXT: movq 32(%rdi), %rbx +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: movq %r8, %r14 +; SSE-NEXT: orq %r11, %r14 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: rep bsfq %rbx, %rdx +; SSE-NEXT: rep bsfq %rsi, %r12 +; SSE-NEXT: addl $64, %r12d +; SSE-NEXT: testq %rbx, %rbx +; SSE-NEXT: cmovnel %edx, %r12d +; SSE-NEXT: movq 48(%rdi), %r13 +; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: rep bsfq %r13, %rdx +; SSE-NEXT: rep bsfq %rcx, %r14 +; SSE-NEXT: addl $64, %r14d +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %edx, %r14d +; SSE-NEXT: subl $-128, %r14d +; SSE-NEXT: movq %rbx, %rdx +; SSE-NEXT: orq %rsi, %rdx +; SSE-NEXT: cmovnel %r12d, %r14d +; SSE-NEXT: movq 72(%rdi), %r12 +; SSE-NEXT: addl $256, %r14d # imm = 0x100 +; SSE-NEXT: movq %r11, %rdx +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: movq %r8, %r13 +; SSE-NEXT: orq %r15, %r13 +; SSE-NEXT: orq %rdx, %r13 +; SSE-NEXT: movq 64(%rdi), %r13 +; SSE-NEXT: cmovnel %eax, %r14d +; SSE-NEXT: rep bsfq %r13, %rdx +; SSE-NEXT: rep bsfq %r12, %rax +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: testq %r13, %r13 +; SSE-NEXT: cmovnel %edx, %eax +; SSE-NEXT: rep bsfq %r10, %rbp +; SSE-NEXT: addl $64, %ebp +; SSE-NEXT: movq 80(%rdi), %r10 +; SSE-NEXT: rep bsfq %r10, %rcx +; SSE-NEXT: testq %r10, %r10 +; SSE-NEXT: cmovnel %ecx, %ebp +; SSE-NEXT: subl $-128, %ebp +; SSE-NEXT: movq %r13, %rcx +; SSE-NEXT: orq %r12, %rcx +; SSE-NEXT: cmovnel %eax, %ebp +; SSE-NEXT: movq 104(%rdi), %r9 +; SSE-NEXT: rep bsfq %r9, %rcx +; SSE-NEXT: addl $64, %ecx +; SSE-NEXT: movq 96(%rdi), %rdx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovnel %eax, %ecx +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq 120(%rdi), %rax +; SSE-NEXT: movq 112(%rdi), %rdi +; SSE-NEXT: addl $64, %eax +; SSE-NEXT: rep bsfq %rdi, %rsi +; SSE-NEXT: testq %rdi, %rdi +; SSE-NEXT: cmovnel %esi, %eax +; SSE-NEXT: subl $-128, %eax +; SSE-NEXT: orq %r9, %rdx +; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; SSE-NEXT: orq %r10, %r13 +; SSE-NEXT: addl $256, %eax # imm = 0x100 +; SSE-NEXT: orq %r12, %r13 +; SSE-NEXT: cmovnel %ebp, %eax +; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; SSE-NEXT: orq %rcx, %r11 +; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; SSE-NEXT: orq %rbx, %r8 +; SSE-NEXT: orq %r15, %r8 +; SSE-NEXT: addl $512, %eax # imm = 0x200 +; SSE-NEXT: orq %r11, %r8 +; SSE-NEXT: cmovnel %r14d, %eax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; +; AVX2-LABEL: load_cttz_i1024: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 72(%rdi), %r14 +; AVX2-NEXT: movq 64(%rdi), %r15 +; AVX2-NEXT: movq 56(%rdi), %r9 +; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 48(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %r10 +; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movq 32(%rdi), %rsi +; AVX2-NEXT: movq 24(%rdi), %rbp +; AVX2-NEXT: movq 16(%rdi), %rbx +; AVX2-NEXT: movq (%rdi), %r8 +; AVX2-NEXT: movq 8(%rdi), %r11 +; AVX2-NEXT: tzcntq %r8, %rax +; AVX2-NEXT: tzcntq %r11, %rdx +; AVX2-NEXT: addl $64, %edx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovnel %eax, %edx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %rbx, %r12 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rbp, %rax +; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %rbx, %rbx +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: movq %r8, %r12 +; AVX2-NEXT: orq %r11, %r12 +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %rsi, %rdx +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %r10, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: cmovnel %edx, %r13d +; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %rcx, %rdx +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %r9, %r12 +; AVX2-NEXT: addl $64, %r12d +; AVX2-NEXT: testq %rcx, %rcx +; AVX2-NEXT: cmovnel %edx, %r12d +; AVX2-NEXT: subl $-128, %r12d +; AVX2-NEXT: movq %rsi, %rdx +; AVX2-NEXT: orq %r10, %rdx +; AVX2-NEXT: cmovnel %r13d, %r12d +; AVX2-NEXT: addl $256, %r12d # imm = 0x100 +; AVX2-NEXT: movq %r11, %rdx +; AVX2-NEXT: orq %rbp, %rdx +; AVX2-NEXT: movq %r8, %r13 +; AVX2-NEXT: orq %rbx, %r13 +; AVX2-NEXT: orq %rdx, %r13 +; AVX2-NEXT: cmovnel %eax, %r12d +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: tzcntq %r15, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r14, %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: testq %r15, %r15 +; AVX2-NEXT: cmovnel %edx, %eax +; AVX2-NEXT: movq 88(%rdi), %rbp +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %rbp, %r13 +; AVX2-NEXT: addl $64, %r13d +; AVX2-NEXT: movq 80(%rdi), %r10 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %r10, %rcx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovnel %ecx, %r13d +; AVX2-NEXT: subl $-128, %r13d +; AVX2-NEXT: movq %r15, %rcx +; AVX2-NEXT: orq %r14, %rcx +; AVX2-NEXT: cmovnel %eax, %r13d +; AVX2-NEXT: movq 104(%rdi), %r9 +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: tzcntq %r9, %rcx +; AVX2-NEXT: addl $64, %ecx +; AVX2-NEXT: movq 96(%rdi), %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovnel %eax, %ecx +; AVX2-NEXT: movq 112(%rdi), %rsi +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 120(%rdi), %rax +; AVX2-NEXT: addl $64, %eax +; AVX2-NEXT: tzcntq %rsi, %rdi +; AVX2-NEXT: testq %rsi, %rsi +; AVX2-NEXT: cmovnel %edi, %eax +; AVX2-NEXT: subl $-128, %eax +; AVX2-NEXT: orq %r9, %rdx +; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: orq %rbp, %r14 +; AVX2-NEXT: orq %r10, %r15 +; AVX2-NEXT: addl $256, %eax # imm = 0x100 +; AVX2-NEXT: orq %r14, %r15 +; AVX2-NEXT: cmovnel %r13d, %eax +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX2-NEXT: orq %rcx, %r11 +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX2-NEXT: orq %rbx, %r8 +; AVX2-NEXT: addl $512, %eax # imm = 0x200 +; AVX2-NEXT: orq %r11, %r8 +; AVX2-NEXT: cmovnel %r12d, %eax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_cttz_i1024: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: movq 88(%rdi), %rbp +; AVX512-NEXT: movq 72(%rdi), %r15 +; AVX512-NEXT: movq 56(%rdi), %r9 +; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 48(%rdi), %rcx +; AVX512-NEXT: movq 40(%rdi), %r10 +; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq 32(%rdi), %rsi +; AVX512-NEXT: movq 24(%rdi), %r14 +; AVX512-NEXT: movq 16(%rdi), %rbx +; AVX512-NEXT: movq (%rdi), %r8 +; AVX512-NEXT: movq 8(%rdi), %r11 +; AVX512-NEXT: tzcntq %r8, %rax +; AVX512-NEXT: tzcntq %r11, %rdx +; AVX512-NEXT: addl $64, %edx +; AVX512-NEXT: testq %r8, %r8 +; AVX512-NEXT: cmovnel %eax, %edx +; AVX512-NEXT: tzcntq %rbx, %r12 +; AVX512-NEXT: tzcntq %r14, %rax +; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %rbx, %rbx +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: movq %r8, %r12 +; AVX512-NEXT: orq %r11, %r12 +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: tzcntq %rsi, %rdx +; AVX512-NEXT: tzcntq %r10, %r13 +; AVX512-NEXT: addl $64, %r13d +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: cmovnel %edx, %r13d +; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: tzcntq %rcx, %rdx +; AVX512-NEXT: tzcntq %r9, %r12 +; AVX512-NEXT: addl $64, %r12d +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: cmovnel %edx, %r12d +; AVX512-NEXT: subl $-128, %r12d +; AVX512-NEXT: movq %rsi, %rdx +; AVX512-NEXT: orq %r10, %rdx +; AVX512-NEXT: cmovnel %r13d, %r12d +; AVX512-NEXT: addl $256, %r12d # imm = 0x100 +; AVX512-NEXT: movq %r11, %rdx +; AVX512-NEXT: orq %r14, %rdx +; AVX512-NEXT: movq %r8, %r13 +; AVX512-NEXT: orq %rbx, %r13 +; AVX512-NEXT: orq %rdx, %r13 +; AVX512-NEXT: movq 64(%rdi), %r13 +; AVX512-NEXT: cmovnel %eax, %r12d +; AVX512-NEXT: tzcntq %r13, %rdx +; AVX512-NEXT: tzcntq %r15, %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: testq %r13, %r13 +; AVX512-NEXT: cmovnel %edx, %eax +; AVX512-NEXT: movq %rbp, %r14 +; AVX512-NEXT: tzcntq %rbp, %rbp +; AVX512-NEXT: addl $64, %ebp +; AVX512-NEXT: movq 80(%rdi), %r10 +; AVX512-NEXT: tzcntq %r10, %rcx +; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: cmovnel %ecx, %ebp +; AVX512-NEXT: subl $-128, %ebp +; AVX512-NEXT: movq %r13, %rcx +; AVX512-NEXT: orq %r15, %rcx +; AVX512-NEXT: cmovnel %eax, %ebp +; AVX512-NEXT: movq 104(%rdi), %r9 +; AVX512-NEXT: tzcntq %r9, %rcx +; AVX512-NEXT: addl $64, %ecx +; AVX512-NEXT: movq 96(%rdi), %rdx +; AVX512-NEXT: tzcntq %rdx, %rax +; AVX512-NEXT: testq %rdx, %rdx +; AVX512-NEXT: cmovnel %eax, %ecx +; AVX512-NEXT: movq 112(%rdi), %rsi +; AVX512-NEXT: tzcntq 120(%rdi), %rax +; AVX512-NEXT: addl $64, %eax +; AVX512-NEXT: tzcntq %rsi, %rdi +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: cmovnel %edi, %eax +; AVX512-NEXT: subl $-128, %eax +; AVX512-NEXT: orq %r9, %rdx +; AVX512-NEXT: cmovnel %ecx, %eax +; AVX512-NEXT: orq %r14, %r15 +; AVX512-NEXT: orq %r10, %r13 +; AVX512-NEXT: addl $256, %eax # imm = 0x100 +; AVX512-NEXT: orq %r15, %r13 +; AVX512-NEXT: cmovnel %ebp, %eax +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX512-NEXT: orq %rcx, %r11 +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX512-NEXT: orq %rbx, %r8 +; AVX512-NEXT: addl $512, %eax # imm = 0x200 +; AVX512-NEXT: orq %r11, %r8 +; AVX512-NEXT: cmovnel %r12d, %eax +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq + %a0 = load i1024, ptr %p0 + %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0) + %res = trunc i1024 %cnt to i32 + ret i32 %res +} diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic.ll b/llvm/test/Instrumentation/AllocToken/intrinsic.ll new file mode 100644 index 0000000..13aaa90 --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/intrinsic.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Test that the alloc-token pass lowers the intrinsic to a constant token ID. +; +; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i64 @llvm.alloc.token.id.i64(metadata) + +define i64 @test_intrinsic_lowering() { +; CHECK-LABEL: define i64 @test_intrinsic_lowering() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 0 +; +entry: + %token_no_ptr = call i64 @llvm.alloc.token.id.i64(metadata !0) + ret i64 %token_no_ptr +} + +define i64 @test_intrinsic_lowering_ptr() { +; CHECK-LABEL: define i64 @test_intrinsic_lowering_ptr() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 1 +; +entry: + %token_with_ptr = call i64 @llvm.alloc.token.id.i64(metadata !1) + ret i64 %token_with_ptr +} + +!0 = !{!"NoPointerType", i1 false} +!1 = !{!"PointerType", i1 true} diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic32.ll b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll new file mode 100644 index 0000000..eb5dbbe --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Test that the alloc-token pass lowers the intrinsic to a constant token ID. +; +; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-pc-linux-gnu" + +declare i32 @llvm.alloc.token.id.i32(metadata) + +define i32 @test_intrinsic_lowering() { +; CHECK-LABEL: define i32 @test_intrinsic_lowering() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 0 +; +entry: + %token_no_ptr = call i32 @llvm.alloc.token.id.i32(metadata !0) + ret i32 %token_no_ptr +} + +define i32 @test_intrinsic_lowering_ptr() { +; CHECK-LABEL: define i32 @test_intrinsic_lowering_ptr() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 1 +; +entry: + %token_with_ptr = call i32 @llvm.alloc.token.id.i32(metadata !1) + ret i32 %token_with_ptr +} + +!0 = !{!"NoPointerType", i1 false} +!1 = !{!"PointerType", i1 true} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index d3b44eb..8160544 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, s4, v7, v8 -// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, v4, 0, 1 -// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] v_add_min_i32 v2, v4, 3, s2 -// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] v_add_min_i32 v2, s4, 4, v2 -// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] v_add_min_i32 v2, v4, v7, 12345 -// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] v_add_max_i32 v2, s4, v7, v8 -// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] v_add_max_i32 v2, v4, 0, 1 -// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] v_add_max_i32 v2, v4, 3, s2 -// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] v_add_max_i32 v2, s4, 4, v2 -// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] v_add_max_i32 v2, v4, v7, 12345 -// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04] v_add_min_u32 v2, s4, v7, v8 -// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] v_add_min_u32 v2, v4, 0, 1 -// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] v_add_min_u32 v2, v4, 3, s2 -// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] v_add_min_u32 v2, s4, 4, v2 -// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] v_add_min_u32 v2, v4, v7, 12345 -// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] v_add_max_u32 v2, s4, v7, v8 -// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] v_add_max_u32 v2, v4, 0, 1 -// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] v_add_max_u32 v2, v4, 3, s2 -// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] v_add_max_u32 v2, s4, 4, v2 -// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] v_add_max_u32 v2, v4, v7, 12345 -// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_cvt_pk_bf16_f32 v5, v1, v2 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 98d07ac..d913bd2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, s4, v7, v8 -// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] v_add_min_i32 v2, v4, 0, 1 -// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] v_add_min_i32 v2, v4, 3, s2 -// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] v_add_min_i32 v2, s4, 4, v2 -// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] v_add_min_i32 v2, v4, v7, 12345 -// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_i32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] v_add_max_i32 v2, s4, v7, v8 -// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] v_add_max_i32 v2, v4, 0, 1 -// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] v_add_max_i32 v2, v4, 3, s2 -// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] v_add_max_i32 v2, s4, 4, v2 -// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] v_add_max_i32 v2, v4, v7, 12345 -// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_add_min_u32 v2, s4, v7, v8 -// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] v_add_min_u32 v2, v4, 0, 1 -// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] v_add_min_u32 v2, v4, 3, s2 -// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] v_add_min_u32 v2, s4, 4, v2 -// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] v_add_min_u32 v2, v4, v7, 12345 -// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_min_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] v_add_max_u32 v2, s4, v7, v8 -// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] v_add_max_u32 v2, v4, 0, 1 -// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] v_add_max_u32 v2, v4, 3, s2 -// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] v_add_max_u32 v2, s4, 4, v2 -// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] v_add_max_u32 v2, v4, v7, 12345 -// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_add_max_u32 v0, v1, v2, v3 clamp +// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] v_cvt_pk_bf16_f32 v5, v1, v2 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index 29bfa54..7af0bfe5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -237,64 +237,76 @@ # GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] + +0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] 0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04 -# GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] +# GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04 -# GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] +# GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04] 0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02 -# GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] +# GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02] 0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00 -# GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] +# GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00] 0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 -# GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] +# GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04 +# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf # GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll index 453ca66..0b8eda4 100644 --- a/llvm/test/Transforms/InstCombine/select-and-or.ll +++ b/llvm/test/Transforms/InstCombine/select-and-or.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes=instcombine < %s | FileCheck %s declare void @use(i1) @@ -6,6 +6,10 @@ declare i1 @gen_i1() declare <2 x i1> @gen_v2i1() ; Should not be converted to "and", which has different poison semantics. +;. +; CHECK: @g1 = external global i16 +; CHECK: @g2 = external global i16 +;. define i1 @logical_and(i1 %a, i1 %b) { ; CHECK-LABEL: @logical_and( ; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false @@ -225,29 +229,29 @@ define i1 @not_not_true(i1 %x, i1 %y) { ; (!x && !y) --> !(x || y) -define i1 @not_not_false(i1 %x, i1 %y) { +define i1 @not_not_false(i1 %x, i1 %y) !prof !0 { ; CHECK-LABEL: @not_not_false( -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i1 %x, true %noty = xor i1 %y, true - %r = select i1 %notx, i1 %noty, i1 false + %r = select i1 %notx, i1 %noty, i1 false, !prof !1 ret i1 %r } ; (!x || !y) --> !(x && y) -define i1 @not_true_not(i1 %x, i1 %y) { +define i1 @not_true_not(i1 %x, i1 %y) !prof !0 { ; CHECK-LABEL: @not_true_not( -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false, !prof [[PROF1]] ; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i1 %x, true %noty = xor i1 %y, true - %r = select i1 %notx, i1 true, i1 %noty + %r = select i1 %notx, i1 true, i1 %noty, !prof !1 ret i1 %r } @@ -1348,3 +1352,12 @@ define i8 @test_logical_commuted_and_ne_a_b(i1 %other_cond, i8 %a, i8 %b) { %select = select i1 %or.cond, i8 %a, i8 %b ret i8 %select } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2} +;. diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index d88eaf8..3d97048 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -58,15 +58,15 @@ define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 { ret i1 %res } -define i1 @xor_and(i1 %c, i32 %X, i32 %Y) { +define i1 @xor_and(i1 %c, i32 %X, i32 %Y) !prof !0 { ; CHECK-LABEL: @xor_and( ; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]], !prof [[PROF2:![0-9]+]] ; CHECK-NEXT: ret i1 [[SEL]] ; %comp = icmp ult i32 %X, %Y - %sel = select i1 %c, i1 %comp, i1 false + %sel = select i1 %c, i1 %comp, i1 false, !prof !1 %res = xor i1 %sel, true ret i1 %res } @@ -97,15 +97,15 @@ define <2 x i1> @xor_and3(<2 x i1> %c, <2 x i32> %X, <2 x i32> %Y) { ret <2 x i1> %res } -define i1 @xor_or(i1 %c, i32 %X, i32 %Y) { +define i1 @xor_or(i1 %c, i32 %X, i32 %Y) !prof !0 { ; CHECK-LABEL: @xor_or( ; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false, !prof [[PROF2]] ; CHECK-NEXT: ret i1 [[SEL]] ; %comp = icmp ult i32 %X, %Y - %sel = select i1 %c, i1 true, i1 %comp + %sel = select i1 %c, i1 true, i1 %comp, !prof !1 %res = xor i1 %sel, true ret i1 %res } @@ -802,4 +802,5 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { ;. ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 2} ;. diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll new file mode 100644 index 0000000..7816781 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=instcombine | FileCheck %s +@A = extern_weak global float, align 4 + +; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true +; and false values, while %v1 and %phi.to.remove are actually the same. +; Fold the selection instruction %same.as.v1 to %v1. +define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; same.as.v1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 8b03db3..152f7db 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -172,6 +172,11 @@ static cl::opt<bool> cl::desc("Print MIR2Vec vocabulary contents"), cl::init(false)); +static cl::opt<bool> + PrintMIR2Vec("print-mir2vec", cl::Hidden, + cl::desc("Print MIR2Vec embeddings for functions"), + cl::init(false)); + static cl::list<std::string> IncludeDirs("I", cl::desc("include search path")); static cl::opt<bool> RemarksWithHotness( @@ -775,6 +780,11 @@ static int compileModule(char **argv, LLVMContext &Context) { PM.add(createMIR2VecVocabPrinterLegacyPass(errs())); } + // Add MIR2Vec printer if requested + if (PrintMIR2Vec) { + PM.add(createMIR2VecPrinterLegacyPass(errs())); + } + PM.add(createFreeMachineFunctionPass()); } else { if (Target->addPassesToEmitFile(PM, *OS, DwoOut ? &DwoOut->os() : nullptr, @@ -788,6 +798,11 @@ static int compileModule(char **argv, LLVMContext &Context) { if (PrintMIR2VecVocab) { PM.add(createMIR2VecVocabPrinterLegacyPass(errs())); } + + // Add MIR2Vec printer if requested + if (PrintMIR2Vec) { + PM.add(createMIR2VecPrinterLegacyPass(errs())); + } } Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(), diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 79f0527..11eb58e 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -202,9 +202,10 @@ static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."), aliasopt(IgnoreCase), cl::NotHidden); static list<std::string> Name( "name", - desc("Find and print all debug info entries whose name (DW_AT_name " - "attribute) matches the exact text in <pattern>. When used with the " - "the -regex option <pattern> is interpreted as a regular expression."), + desc("Find and print all debug info entries whose name " + "(DW_AT_name/DW_AT_linkage_name attribute) matches the exact text " + "in <pattern>. When used with the the -regex option <pattern> is " + "interpreted as a regular expression."), value_desc("pattern"), cat(DwarfDumpCategory)); static alias NameAlias("n", desc("Alias for --name"), aliasopt(Name), cl::NotHidden); diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp index 58f5dcc6..3c2e963 100644 --- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -283,7 +283,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { OnDiskGraphDB::FaultInPolicy::SingleNode); } -#if defined(EXPENSIVE_CHECKS) +#if defined(EXPENSIVE_CHECKS) && !defined(_WIN32) TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) { setMaxOnDiskCASMappingSize(); unittest::TempDir Temp("ondiskcas", /*Unique=*/true); diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp index 89c03b8..19ea8f5 100644 --- a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp @@ -33,13 +33,13 @@ TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) { } ValueType ValW = valueFromString("world"); - ArrayRef<char> Val; + std::optional<ArrayRef<char>> Val; ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded()); - EXPECT_EQ(Val, ArrayRef(ValW)); + EXPECT_EQ(*Val, ArrayRef(ValW)); ASSERT_THAT_ERROR( DB->put(digest("hello"), valueFromString("other")).moveInto(Val), Succeeded()); - EXPECT_EQ(Val, ArrayRef(ValW)); + EXPECT_EQ(*Val, ArrayRef(ValW)); { std::optional<ArrayRef<char>> Val; diff --git a/llvm/unittests/CodeGen/MIR2VecTest.cpp b/llvm/unittests/CodeGen/MIR2VecTest.cpp index 11222b4..8710d6b 100644 --- a/llvm/unittests/CodeGen/MIR2VecTest.cpp +++ b/llvm/unittests/CodeGen/MIR2VecTest.cpp @@ -82,6 +82,9 @@ protected: return; } + // Set the data layout to match the target machine + M->setDataLayout(TM->createDataLayout()); + // Create a dummy function to get subtarget info FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *F = @@ -96,16 +99,27 @@ protected: } void TearDown() override { TII = nullptr; } -}; -// Function to find an opcode by name -static int findOpcodeByName(const TargetInstrInfo *TII, StringRef Name) { - for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) { - if (TII->getName(Opcode) == Name) - return Opcode; + // Find an opcode by name + int findOpcodeByName(StringRef Name) { + for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) { + if (TII->getName(Opcode) == Name) + return Opcode; + } + return -1; // Not found } - return -1; // Not found -} + + // Create a vocabulary with specific opcodes and embeddings + Expected<MIRVocabulary> + createTestVocab(std::initializer_list<std::pair<const char *, float>> opcodes, + unsigned dimension = 2) { + assert(TII && "TargetInstrInfo not initialized"); + VocabMap VMap; + for (const auto &[name, value] : opcodes) + VMap[name] = Embedding(dimension, value); + return MIRVocabulary::create(std::move(VMap), *TII); + } +}; TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { // Test that same base opcodes get same canonical indices @@ -118,10 +132,8 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { // Create a MIRVocabulary instance to test the mapping // Use a minimal MIRVocabulary to trigger canonical mapping construction - VocabMap VMap; Embedding Val = Embedding(64, 1.0f); - VMap["ADD"] = Val; - auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64); ASSERT_TRUE(static_cast<bool>(TestVocabOrErr)) << "Failed to create vocabulary: " << toString(TestVocabOrErr.takeError()); @@ -156,16 +168,16 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) { 6880u); // X86 has >6880 unique base opcodes // Check that the embeddings for opcodes not in the vocab are zero vectors - int Add32rrOpcode = findOpcodeByName(TII, "ADD32rr"); + int Add32rrOpcode = findOpcodeByName("ADD32rr"); ASSERT_NE(Add32rrOpcode, -1) << "ADD32rr opcode not found"; EXPECT_TRUE(TestVocab[Add32rrOpcode].approximatelyEquals(Val)); - int Sub32rrOpcode = findOpcodeByName(TII, "SUB32rr"); + int Sub32rrOpcode = findOpcodeByName("SUB32rr"); ASSERT_NE(Sub32rrOpcode, -1) << "SUB32rr opcode not found"; EXPECT_TRUE( TestVocab[Sub32rrOpcode].approximatelyEquals(Embedding(64, 0.0f))); - int Mov32rrOpcode = findOpcodeByName(TII, "MOV32rr"); + int Mov32rrOpcode = findOpcodeByName("MOV32rr"); ASSERT_NE(Mov32rrOpcode, -1) << "MOV32rr opcode not found"; EXPECT_TRUE( TestVocab[Mov32rrOpcode].approximatelyEquals(Embedding(64, 0.0f))); @@ -178,9 +190,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { // Create a MIRVocabulary instance to test deterministic mapping // Use a minimal MIRVocabulary to trigger canonical mapping construction - VocabMap VMap; - VMap["ADD"] = Embedding(64, 1.0f); - auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64); ASSERT_TRUE(static_cast<bool>(TestVocabOrErr)) << "Failed to create vocabulary: " << toString(TestVocabOrErr.takeError()); @@ -189,8 +199,6 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { unsigned Index1 = TestVocab.getCanonicalIndexForBaseName(BaseName); unsigned Index2 = TestVocab.getCanonicalIndexForBaseName(BaseName); unsigned Index3 = TestVocab.getCanonicalIndexForBaseName(BaseName); - - EXPECT_EQ(Index1, Index2); EXPECT_EQ(Index2, Index3); // Test across multiple runs @@ -202,11 +210,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) { // Test MIRVocabulary construction TEST_F(MIR2VecVocabTestFixture, VocabularyConstruction) { - VocabMap VMap; - VMap["ADD"] = Embedding(128, 1.0f); // Dimension 128, all values 1.0 - VMap["SUB"] = Embedding(128, 2.0f); // Dimension 128, all values 2.0 - - auto VocabOrErr = MIRVocabulary::create(std::move(VMap), *TII); + auto VocabOrErr = createTestVocab({{"ADD", 1.0f}, {"SUB", 2.0f}}, 128); ASSERT_TRUE(static_cast<bool>(VocabOrErr)) << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); auto &Vocab = *VocabOrErr; @@ -243,4 +247,251 @@ TEST_F(MIR2VecVocabTestFixture, EmptyVocabularyCreation) { } } +// Fixture for embedding related tests +class MIR2VecEmbeddingTestFixture : public MIR2VecVocabTestFixture { +protected: + std::unique_ptr<MachineModuleInfo> MMI; + MachineFunction *MF = nullptr; + + void SetUp() override { + MIR2VecVocabTestFixture::SetUp(); + // If base class setup was skipped (TII not initialized), skip derived setup + if (!TII) + GTEST_SKIP() << "Failed to get target instruction info in " + "the base class setup; Skipping test"; + + // Create a dummy function for MachineFunction + FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = + Function::Create(FT, Function::ExternalLinkage, "test", M.get()); + + MMI = std::make_unique<MachineModuleInfo>(TM.get()); + MF = &MMI->getOrCreateMachineFunction(*F); + } + + void TearDown() override { MIR2VecVocabTestFixture::TearDown(); } + + // Create a machine instruction + MachineInstr *createMachineInstr(MachineBasicBlock &MBB, unsigned Opcode) { + const MCInstrDesc &Desc = TII->get(Opcode); + // Create instruction - operands don't affect opcode-based embeddings + MachineInstr *MI = BuildMI(MBB, MBB.end(), DebugLoc(), Desc); + return MI; + } + + MachineInstr *createMachineInstr(MachineBasicBlock &MBB, + const char *OpcodeName) { + int Opcode = findOpcodeByName(OpcodeName); + if (Opcode == -1) + return nullptr; + return createMachineInstr(MBB, Opcode); + } + + void createMachineInstrs(MachineBasicBlock &MBB, + std::initializer_list<const char *> Opcodes) { + for (const char *OpcodeName : Opcodes) { + MachineInstr *MI = createMachineInstr(MBB, OpcodeName); + ASSERT_TRUE(MI != nullptr); + } + } +}; + +// Test factory method for creating embedder +TEST_F(MIR2VecEmbeddingTestFixture, CreateSymbolicEmbedder) { + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Emb = MIREmbedder::create(MIR2VecKind::Symbolic, *MF, V); + EXPECT_NE(Emb, nullptr); +} + +TEST_F(MIR2VecEmbeddingTestFixture, CreateInvalidMode) { + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Result = MIREmbedder::create(static_cast<MIR2VecKind>(-1), *MF, V); + EXPECT_FALSE(static_cast<bool>(Result)); +} + +// Test SymbolicMIREmbedder with simple target opcodes +TEST_F(MIR2VecEmbeddingTestFixture, TestSymbolicEmbedder) { + // Create a test vocabulary with specific values + auto VocabOrErr = createTestVocab( + { + {"NOOP", 1.0f}, // [1.0, 1.0, 1.0, 1.0] + {"RET", 2.0f}, // [2.0, 2.0, 2.0, 2.0] + {"TRAP", 3.0f} // [3.0, 3.0, 3.0, 3.0] + }, + 4); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + // Create a basic block using fixture's MF + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Use real X86 opcodes that should exist and not be pseudo + auto NoopInst = createMachineInstr(*MBB, "NOOP"); + ASSERT_TRUE(NoopInst != nullptr); + + auto RetInst = createMachineInstr(*MBB, "RET64"); + ASSERT_TRUE(RetInst != nullptr); + + auto TrapInst = createMachineInstr(*MBB, "TRAP"); + ASSERT_TRUE(TrapInst != nullptr); + + // Verify these are not pseudo instructions + ASSERT_FALSE(NoopInst->isPseudo()) << "NOOP is marked as pseudo instruction"; + ASSERT_FALSE(RetInst->isPseudo()) << "RET is marked as pseudo instruction"; + ASSERT_FALSE(TrapInst->isPseudo()) << "TRAP is marked as pseudo instruction"; + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test instruction embeddings + auto NoopEmb = Embedder->getMInstVector(*NoopInst); + auto RetEmb = Embedder->getMInstVector(*RetInst); + auto TrapEmb = Embedder->getMInstVector(*TrapInst); + + // Verify embeddings match expected values (accounting for weight scaling) + float ExpectedWeight = mir2vec::OpcWeight; // Global weight from command line + EXPECT_TRUE(NoopEmb.approximatelyEquals(Embedding(4, 1.0f * ExpectedWeight))); + EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(4, 2.0f * ExpectedWeight))); + EXPECT_TRUE(TrapEmb.approximatelyEquals(Embedding(4, 3.0f * ExpectedWeight))); + + // Test basic block embedding (should be sum of instruction embeddings) + auto MBBVector = Embedder->getMBBVector(*MBB); + + // Expected BB vector: NOOP + RET + TRAP = [1+2+3, 1+2+3, 1+2+3, 1+2+3] * + // weight = [6, 6, 6, 6] * weight + Embedding ExpectedMBBVector(4, 6.0f * ExpectedWeight); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedMBBVector)); + + // Test function embedding (should equal MBB embedding since we have one MBB) + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBBVector)); +} + +// Test embedder with multiple basic blocks +TEST_F(MIR2VecEmbeddingTestFixture, MultipleBasicBlocks) { + // Create a test vocabulary + auto VocabOrErr = createTestVocab({{"NOOP", 1.0f}, {"TRAP", 2.0f}}); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + + // Create two basic blocks using fixture's MF + MachineBasicBlock *MBB1 = MF->CreateMachineBasicBlock(); + MachineBasicBlock *MBB2 = MF->CreateMachineBasicBlock(); + MF->push_back(MBB1); + MF->push_back(MBB2); + + createMachineInstrs(*MBB1, {"NOOP", "NOOP"}); + createMachineInstr(*MBB2, "TRAP"); + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test basic block embeddings + auto MBB1Vector = Embedder->getMBBVector(*MBB1); + auto MBB2Vector = Embedder->getMBBVector(*MBB2); + + float ExpectedWeight = mir2vec::OpcWeight; + // BB1: NOOP + NOOP = 2 * ([1, 1] * weight) + Embedding ExpectedMBB1Vector(2, 2.0f * ExpectedWeight); + EXPECT_TRUE(MBB1Vector.approximatelyEquals(ExpectedMBB1Vector)); + + // BB2: TRAP = [2, 2] * weight + Embedding ExpectedMBB2Vector(2, 2.0f * ExpectedWeight); + EXPECT_TRUE(MBB2Vector.approximatelyEquals(ExpectedMBB2Vector)); + + // Function embedding: BB1 + BB2 = [2+2, 2+2] * weight = [4, 4] * weight + // Function embedding should be just the first BB embedding as the second BB + // is unreachable + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBB1Vector)); + + // Add a branch from BB1 to BB2 to make both reachable; now function embedding + // should be MBB1 + MBB2 + MBB1->addSuccessor(MBB2); + auto NewMFuncVector = Embedder->getMFunctionVector(); // Recompute embeddings + Embedding ExpectedFuncVector = MBB1Vector + MBB2Vector; + EXPECT_TRUE(NewMFuncVector.approximatelyEquals(ExpectedFuncVector)); +} + +// Test embedder with empty basic block +TEST_F(MIR2VecEmbeddingTestFixture, EmptyBasicBlock) { + + // Create an empty basic block + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Create embedder + auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 2); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &V = *VocabOrErr; + auto Embedder = SymbolicMIREmbedder::create(*MF, V); + ASSERT_TRUE(Embedder != nullptr); + + // Test that empty BB has zero embedding + auto MBBVector = Embedder->getMBBVector(*MBB); + Embedding ExpectedBBVector(2, 0.0f); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector)); + + // Function embedding should also be zero + auto MFuncVector = Embedder->getMFunctionVector(); + EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedBBVector)); +} + +// Test embedder with opcodes not in vocabulary +TEST_F(MIR2VecEmbeddingTestFixture, UnknownOpcodes) { + // Create a test vocabulary with limited entries + // SUB is intentionally not included + auto VocabOrErr = createTestVocab({{"ADD", 1.0f}}); + ASSERT_TRUE(static_cast<bool>(VocabOrErr)) + << "Failed to create vocabulary: " << toString(VocabOrErr.takeError()); + auto &Vocab = *VocabOrErr; + + // Create a basic block + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MF->push_back(MBB); + + // Find opcodes + int AddOpcode = findOpcodeByName("ADD32rr"); + int SubOpcode = findOpcodeByName("SUB32rr"); + + ASSERT_NE(AddOpcode, -1) << "ADD32rr opcode not found"; + ASSERT_NE(SubOpcode, -1) << "SUB32rr opcode not found"; + + // Create instructions + MachineInstr *AddInstr = createMachineInstr(*MBB, AddOpcode); + MachineInstr *SubInstr = createMachineInstr(*MBB, SubOpcode); + + // Create embedder + auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab); + ASSERT_TRUE(Embedder != nullptr); + + // Test instruction embeddings + auto AddVector = Embedder->getMInstVector(*AddInstr); + auto SubVector = Embedder->getMInstVector(*SubInstr); + + float ExpectedWeight = mir2vec::OpcWeight; + // ADD should have the embedding from vocabulary + EXPECT_TRUE( + AddVector.approximatelyEquals(Embedding(2, 1.0f * ExpectedWeight))); + + // SUB should have zero embedding (not in vocabulary) + EXPECT_TRUE(SubVector.approximatelyEquals(Embedding(2, 0.0f))); + + // Basic block embedding should be ADD + SUB = [1.0, 1.0] * weight + [0.0, + // 0.0] = [1.0, 1.0] * weight + const auto &MBBVector = Embedder->getMBBVector(*MBB); + Embedding ExpectedBBVector(2, 1.0f * ExpectedWeight); + EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector)); +} } // namespace diff --git a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp index b988a78a..08b4e8f 100644 --- a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp @@ -54,7 +54,9 @@ protected: return ContainerElementsMap(); ContainerElementsMap Result = SNs[0]->defs(); +#ifndef NDEBUG const ContainerElementsMap &Deps = SNs[0]->deps(); +#endif // NDEBUG for (size_t I = 1; I != SNs.size(); ++I) { assert(!DepsMustMatch || SNs[I]->deps() == Deps); diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp index 2a0f500..e108c4d 100644 --- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp @@ -33,7 +33,7 @@ TEST_F(VPDominatorTreeTest, DominanceNoRegionsTest) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2"); VPBasicBlock *VPBB3 = Plan.createVPBasicBlock("VPBB3"); VPBasicBlock *VPBB4 = Plan.createVPBasicBlock("VPBB4"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB1, VPBB4); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB1, VPBB4); VPBB2->setParent(R1); VPBB3->setParent(R1); @@ -99,7 +99,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB4 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4); R1BB2->setParent(R1); R1BB3->setParent(R1); VPBlockUtils::connectBlocks(VPBB0, R1); @@ -112,7 +112,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); @@ -171,12 +171,12 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB#"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3); R2BB2->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB2, R2BB1); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index db64c75..c1791dfa 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -269,7 +269,7 @@ TEST_F(VPBasicBlockTest, getPlan) { // VPBasicBlock is the entry into the VPlan, followed by a region. VPBasicBlock *R1BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2); VPBlockUtils::connectBlocks(R1BB1, R1BB2); VPBlockUtils::connectBlocks(VPBB1, R1); @@ -286,12 +286,12 @@ TEST_F(VPBasicBlockTest, getPlan) { VPlan &Plan = getPlan(); VPBasicBlock *R1BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2); VPBlockUtils::connectBlocks(R1BB1, R1BB2); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -369,7 +369,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R1BB2 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock(""); VPBasicBlock *R1BB4 = Plan.createVPBasicBlock(""); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4); R1BB2->setParent(R1); R1BB3->setParent(R1); VPBlockUtils::connectBlocks(VPBB0, R1); @@ -382,7 +382,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R2BB1 = Plan.createVPBasicBlock(""); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock(""); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); @@ -467,12 +467,12 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB3"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3); R2BB2->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB2, R2BB1); @@ -537,10 +537,10 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { VPlan &Plan = getPlan(); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2); VPBlockUtils::connectBlocks(R2BB1, R2BB2); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2); R2->setParent(R1); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -590,14 +590,14 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) { // VPlan &Plan = getPlan(); VPBasicBlock *R3BB1 = Plan.createVPBasicBlock("R3BB1"); - VPRegionBlock *R3 = Plan.createVPRegionBlock(R3BB1, R3BB1, "R3"); + VPRegionBlock *R3 = Plan.createLoopRegion("R3", R3BB1, R3BB1); VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1"); - VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R3, "R2"); + VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R3); R3->setParent(R2); VPBlockUtils::connectBlocks(R2BB1, R3); - VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2); R2->setParent(R1); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -687,7 +687,7 @@ TEST_F(VPBasicBlockTest, reassociateBlocks) { VPlan &Plan = getPlan(); VPBasicBlock *VPBB1 = Plan.createVPBasicBlock("VPBB1"); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2"); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); auto *WidenPhi = new VPWidenPHIRecipe(nullptr); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index c2f045b..50ad4d5 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -32,7 +32,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPBB2->appendRecipe(CanIV); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -71,7 +71,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPBB2->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -117,7 +117,7 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBlockUtils::connectBlocks(VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB4, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); @@ -160,7 +160,7 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPBB3->appendRecipe(CanIV); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB3, VPBB3); VPBlockUtils::connectBlocks(VPBB1, VPBB2); VPBlockUtils::connectBlocks(VPBB2, R1); VPBlockUtils::connectBlocks(VPBB4, Plan.getScalarHeader()); @@ -200,7 +200,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(VPBB1, R1); @@ -237,7 +237,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBlockUtils::connectBlocks(VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB2, VPBB3); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB3, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB3); VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); @@ -270,7 +270,7 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { VPBB1->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -302,7 +302,7 @@ TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { VPBB2->appendRecipe(IRPhi); VPBB2->appendRecipe(BranchOnCond); - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); + VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn index 5efc153..51911d7 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn @@ -47,7 +47,6 @@ static_library("Clang") { "ClangASTImporter.cpp", "ClangASTMetadata.cpp", "ClangASTSource.cpp", - "ClangDeclVendor.cpp", "ClangExpressionDeclMap.cpp", "ClangExpressionHelper.cpp", "ClangExpressionParser.cpp", diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td index f693a07..d9882cb 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td @@ -101,6 +101,60 @@ def ClauseRequires : OpenMP_BitEnumAttr< def ClauseRequiresAttr : OpenMP_EnumAttr<ClauseRequires, "clause_requires">; + +//===----------------------------------------------------------------------===// +// clause_map_flag enum. +//===----------------------------------------------------------------------===// + +def ClauseMapFlagsNone : I32BitEnumAttrCaseNone<"none">; +def ClauseMapFlagsStorage : I32BitEnumAttrCaseBit<"storage", 0>; // alloc/release synonym +def ClauseMapFlagsTo : I32BitEnumAttrCaseBit<"to", 1>; +def ClauseMapFlagsFrom : I32BitEnumAttrCaseBit<"from", 2>; +def ClauseMapFlagsAlways : I32BitEnumAttrCaseBit<"always", 3>; +def ClauseMapFlagsDelete : I32BitEnumAttrCaseBit<"del", 4>; // delete, is reserved by C/C++ +def ClauseMapFlagsReturnParam : I32BitEnumAttrCaseBit<"return_param", 5>; +def ClauseMapFlagsPrivate : I32BitEnumAttrCaseBit<"priv", 6>; // private, is reserved by C/C++ +def ClauseMapFlagsLiteral : I32BitEnumAttrCaseBit<"literal", 7>; +def ClauseMapFlagsImplicit : I32BitEnumAttrCaseBit<"implicit", 8>; +def ClauseMapFlagsClose : I32BitEnumAttrCaseBit<"close", 9>; +def ClauseMapFlagsPresent : I32BitEnumAttrCaseBit<"present", 10>; +def ClauseMapFlagsOMPXHold : I32BitEnumAttrCaseBit<"ompx_hold", 11>; +def ClauseMapFlagsAttach : I32BitEnumAttrCaseBit<"attach", 12>; +def ClauseMapFlagsAttachAlways : I32BitEnumAttrCaseBit<"attach_always", 13>; +def ClauseMapFlagsAttachNone : I32BitEnumAttrCaseBit<"attach_none", 14>; +def ClauseMapFlagsAttachAuto : I32BitEnumAttrCaseBit<"attach_auto", 15>; +def ClauseMapFlagsRefPtr : I32BitEnumAttrCaseBit<"ref_ptr", 16>; +def ClauseMapFlagsRefPtee : I32BitEnumAttrCaseBit<"ref_ptee", 17>; +def ClauseMapFlagsRefPtrPtee : I32BitEnumAttrCaseBit<"ref_ptr_ptee", 18>; + +def ClauseMapFlags : OpenMP_BitEnumAttr< + "ClauseMapFlags", + "Map types and modifiers tied to data maps", [ + ClauseMapFlagsNone, + ClauseMapFlagsStorage, + ClauseMapFlagsTo, + ClauseMapFlagsFrom, + ClauseMapFlagsAlways, + ClauseMapFlagsDelete, + ClauseMapFlagsReturnParam, + ClauseMapFlagsPrivate, + ClauseMapFlagsLiteral, + ClauseMapFlagsImplicit, + ClauseMapFlagsClose, + ClauseMapFlagsPresent, + ClauseMapFlagsOMPXHold, + ClauseMapFlagsAttach, + ClauseMapFlagsAttachAlways, + ClauseMapFlagsAttachNone, + ClauseMapFlagsAttachAuto, + ClauseMapFlagsRefPtr, + ClauseMapFlagsRefPtee, + ClauseMapFlagsRefPtrPtee + ]>; + +def ClauseMapFlagsAttr : OpenMP_EnumAttr<ClauseMapFlags, + "clause_map_flags">; + //===----------------------------------------------------------------------===// // clause_task_depend enum. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index b73091e..377f1fe 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1198,7 +1198,7 @@ def MapBoundsOp : OpenMP_Op<"map.bounds", def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> { let arguments = (ins OpenMP_PointerLikeType:$var_ptr, TypeAttr:$var_type, - UI64Attr:$map_type, + ClauseMapFlagsAttr:$map_type, VariableCaptureKindAttr:$map_capture_type, Optional<OpenMP_PointerLikeType>:$var_ptr_ptr, Variadic<OpenMP_PointerLikeType>:$members, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index fd4cabbad..1b069c6 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -32,7 +32,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Support/InterleavedRange.h" #include <cstddef> #include <iterator> @@ -1737,10 +1736,10 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) { // Parser, printer and verifier for Target //===----------------------------------------------------------------------===// -// Helper function to get bitwise AND of `value` and 'flag' -static uint64_t mapTypeToBitFlag(uint64_t value, - llvm::omp::OpenMPOffloadMappingFlags flag) { - return value & llvm::to_underlying(flag); +// Helper function to get bitwise AND of `value` and 'flag' then return it as a +// boolean +static bool mapTypeToBool(ClauseMapFlags value, ClauseMapFlags flag) { + return (value & flag) == flag; } /// Parses a map_entries map type from a string format back into its numeric @@ -1748,10 +1747,9 @@ static uint64_t mapTypeToBitFlag(uint64_t value, /// /// map-clause = `map_clauses ( ( `(` `always, `? `implicit, `? `ompx_hold, `? /// `close, `? `present, `? ( `to` | `from` | `delete` `)` )+ `)` ) -static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; - +static ParseResult parseMapClause(OpAsmParser &parser, + ClauseMapFlagsAttr &mapType) { + ClauseMapFlags mapTypeBits = ClauseMapFlags::none; // This simply verifies the correct keyword is read in, the // keyword itself is stored inside of the operation auto parseTypeAndMod = [&]() -> ParseResult { @@ -1760,35 +1758,64 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { return failure(); if (mapTypeMod == "always") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + mapTypeBits |= ClauseMapFlags::always; if (mapTypeMod == "implicit") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mapTypeBits |= ClauseMapFlags::implicit; if (mapTypeMod == "ompx_hold") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + mapTypeBits |= ClauseMapFlags::ompx_hold; if (mapTypeMod == "close") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; + mapTypeBits |= ClauseMapFlags::close; if (mapTypeMod == "present") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + mapTypeBits |= ClauseMapFlags::present; if (mapTypeMod == "to") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapTypeBits |= ClauseMapFlags::to; if (mapTypeMod == "from") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapTypeBits |= ClauseMapFlags::from; if (mapTypeMod == "tofrom") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + mapTypeBits |= ClauseMapFlags::to | ClauseMapFlags::from; if (mapTypeMod == "delete") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; + mapTypeBits |= ClauseMapFlags::del; + + if (mapTypeMod == "storage") + mapTypeBits |= ClauseMapFlags::storage; if (mapTypeMod == "return_param") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + mapTypeBits |= ClauseMapFlags::return_param; + + if (mapTypeMod == "private") + mapTypeBits |= ClauseMapFlags::priv; + + if (mapTypeMod == "literal") + mapTypeBits |= ClauseMapFlags::literal; + + if (mapTypeMod == "attach") + mapTypeBits |= ClauseMapFlags::attach; + + if (mapTypeMod == "attach_always") + mapTypeBits |= ClauseMapFlags::attach_always; + + if (mapTypeMod == "attach_none") + mapTypeBits |= ClauseMapFlags::attach_none; + + if (mapTypeMod == "attach_auto") + mapTypeBits |= ClauseMapFlags::attach_auto; + + if (mapTypeMod == "ref_ptr") + mapTypeBits |= ClauseMapFlags::ref_ptr; + + if (mapTypeMod == "ref_ptee") + mapTypeBits |= ClauseMapFlags::ref_ptee; + + if (mapTypeMod == "ref_ptr_ptee") + mapTypeBits |= ClauseMapFlags::ref_ptr_ptee; return success(); }; @@ -1796,9 +1823,8 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { if (parser.parseCommaSeparatedList(parseTypeAndMod)) return failure(); - mapType = parser.getBuilder().getIntegerAttr( - parser.getBuilder().getIntegerType(64, /*isSigned=*/false), - llvm::to_underlying(mapTypeBits)); + mapType = + parser.getBuilder().getAttr<mlir::omp::ClauseMapFlagsAttr>(mapTypeBits); return success(); } @@ -1806,60 +1832,62 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { /// Prints a map_entries map type from its numeric value out into its string /// format. static void printMapClause(OpAsmPrinter &p, Operation *op, - IntegerAttr mapType) { - uint64_t mapTypeBits = mapType.getUInt(); - - bool emitAllocRelease = true; + ClauseMapFlagsAttr mapType) { llvm::SmallVector<std::string, 4> mapTypeStrs; + ClauseMapFlags mapFlags = mapType.getValue(); // handling of always, close, present placed at the beginning of the string // to aid readability - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)) + if (mapTypeToBool(mapFlags, ClauseMapFlags::always)) mapTypeStrs.push_back("always"); - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) + if (mapTypeToBool(mapFlags, ClauseMapFlags::implicit)) mapTypeStrs.push_back("implicit"); - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD)) + if (mapTypeToBool(mapFlags, ClauseMapFlags::ompx_hold)) mapTypeStrs.push_back("ompx_hold"); - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE)) + if (mapTypeToBool(mapFlags, ClauseMapFlags::close)) mapTypeStrs.push_back("close"); - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) + if (mapTypeToBool(mapFlags, ClauseMapFlags::present)) mapTypeStrs.push_back("present"); // special handling of to/from/tofrom/delete and release/alloc, release + // alloc are the abscense of one of the other flags, whereas tofrom requires // both the to and from flag to be set. - bool to = mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); - bool from = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM); - if (to && from) { - emitAllocRelease = false; + bool to = mapTypeToBool(mapFlags, ClauseMapFlags::to); + bool from = mapTypeToBool(mapFlags, ClauseMapFlags::from); + + if (to && from) mapTypeStrs.push_back("tofrom"); - } else if (from) { - emitAllocRelease = false; + else if (from) mapTypeStrs.push_back("from"); - } else if (to) { - emitAllocRelease = false; + else if (to) mapTypeStrs.push_back("to"); - } - if (mapTypeToBitFlag(mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)) { - emitAllocRelease = false; + + if (mapTypeToBool(mapFlags, ClauseMapFlags::del)) mapTypeStrs.push_back("delete"); - } - if (mapTypeToBitFlag( - mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM)) { - emitAllocRelease = false; + if (mapTypeToBool(mapFlags, ClauseMapFlags::return_param)) mapTypeStrs.push_back("return_param"); - } - if (emitAllocRelease) - mapTypeStrs.push_back("exit_release_or_enter_alloc"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::storage)) + mapTypeStrs.push_back("storage"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::priv)) + mapTypeStrs.push_back("private"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::literal)) + mapTypeStrs.push_back("literal"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::attach)) + mapTypeStrs.push_back("attach"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_always)) + mapTypeStrs.push_back("attach_always"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_none)) + mapTypeStrs.push_back("attach_none"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_auto)) + mapTypeStrs.push_back("attach_auto"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptr)) + mapTypeStrs.push_back("ref_ptr"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptee)) + mapTypeStrs.push_back("ref_ptee"); + if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptr_ptee)) + mapTypeStrs.push_back("ref_ptr_ptee"); + if (mapFlags == ClauseMapFlags::none) + mapTypeStrs.push_back("none"); for (unsigned int i = 0; i < mapTypeStrs.size(); ++i) { p << mapTypeStrs[i]; @@ -1963,21 +1991,15 @@ static LogicalResult verifyMapClause(Operation *op, OperandRange mapVars) { return emitError(op->getLoc(), "missing map operation"); if (auto mapInfoOp = mapOp.getDefiningOp<mlir::omp::MapInfoOp>()) { - uint64_t mapTypeBits = mapInfoOp.getMapType(); - - bool to = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); - bool from = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM); - bool del = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE); - - bool always = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS); - bool close = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); - bool implicit = mapTypeToBitFlag( - mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + mlir::omp::ClauseMapFlags mapTypeBits = mapInfoOp.getMapType(); + + bool to = mapTypeToBool(mapTypeBits, ClauseMapFlags::to); + bool from = mapTypeToBool(mapTypeBits, ClauseMapFlags::from); + bool del = mapTypeToBool(mapTypeBits, ClauseMapFlags::del); + + bool always = mapTypeToBool(mapTypeBits, ClauseMapFlags::always); + bool close = mapTypeToBool(mapTypeBits, ClauseMapFlags::close); + bool implicit = mapTypeToBool(mapTypeBits, ClauseMapFlags::implicit); if ((isa<TargetDataOp>(op) || isa<TargetOp>(op)) && del) return emitError(op->getLoc(), diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 4d81918..776b5c6 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -378,10 +378,8 @@ struct SourceMgrDiagnosticHandlerImpl { } // Otherwise, try to load the source file. - auto bufferOrErr = llvm::MemoryBuffer::getFile(filename); - if (!bufferOrErr) - return 0; - unsigned id = mgr.AddNewSourceBuffer(std::move(*bufferOrErr), SMLoc()); + std::string ignored; + unsigned id = mgr.AddIncludeFile(std::string(filename), SMLoc(), ignored); filenameToBufId[filename] = id; return id; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8de49dd..b851414 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3833,6 +3833,58 @@ static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, return builder.getInt64(dl.getTypeSizeInBits(type) / 8); } +// Convert the MLIR map flag set to the runtime map flag set for embedding +// in LLVM-IR. This is important as the two bit-flag lists do not correspond +// 1-to-1 as there's flags the runtime doesn't care about and vice versa. +// Certain flags are discarded here such as RefPtee and co. +static llvm::omp::OpenMPOffloadMappingFlags +convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) { + auto mapTypeToBool = [&mlirFlags](omp::ClauseMapFlags flag) { + return (mlirFlags & flag) == flag; + }; + + llvm::omp::OpenMPOffloadMappingFlags mapType = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; + + if (mapTypeToBool(omp::ClauseMapFlags::to)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + + if (mapTypeToBool(omp::ClauseMapFlags::from)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + + if (mapTypeToBool(omp::ClauseMapFlags::always)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + + if (mapTypeToBool(omp::ClauseMapFlags::del)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; + + if (mapTypeToBool(omp::ClauseMapFlags::return_param)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + + if (mapTypeToBool(omp::ClauseMapFlags::priv)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE; + + if (mapTypeToBool(omp::ClauseMapFlags::literal)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; + + if (mapTypeToBool(omp::ClauseMapFlags::implicit)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + + if (mapTypeToBool(omp::ClauseMapFlags::close)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; + + if (mapTypeToBool(omp::ClauseMapFlags::present)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + + if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + + if (mapTypeToBool(omp::ClauseMapFlags::attach)) + mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH; + + return mapType; +} + static void collectMapDataFromMapOperands( MapInfoData &mapData, SmallVectorImpl<Value> &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, @@ -3880,8 +3932,7 @@ static void collectMapDataFromMapOperands( getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(), mapData.BaseType.back(), builder, moduleTranslation)); mapData.MapClause.push_back(mapOp.getOperation()); - mapData.Types.push_back( - llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType())); + mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType())); mapData.Names.push_back(LLVM::createMappingInformation( mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None); @@ -3950,8 +4001,7 @@ static void collectMapDataFromMapOperands( Value offloadPtr = mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr(); llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr); - auto mapType = - static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType()); + auto mapType = convertClauseMapFlags(mapOp.getMapType()); auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; mapData.OriginalValue.push_back(origValue); @@ -4299,8 +4349,7 @@ static void processMapMembersWithParent( // in part as we currently have substantially less information on the data // being mapped at this stage. if (checkIfPointerMap(memberClause)) { - auto mapFlag = - llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType()); + auto mapFlag = convertClauseMapFlags(memberClause.getMapType()); mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); @@ -4319,8 +4368,7 @@ static void processMapMembersWithParent( // Same MemberOfFlag to indicate its link with parent and other members // of. - auto mapFlag = - llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType()); + auto mapFlag = convertClauseMapFlags(memberClause.getMapType()); mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 7d8ccd9..f2fbe91 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -216,22 +216,22 @@ func.func @task_depend(%arg0: !llvm.ptr) { // CHECK: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr, %[[ARG2:.*]]: !llvm.ptr, %[[ARG3:.*]]: !llvm.ptr) // CHECK: %[[MAP0:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} -// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} +// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(always, storage) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: omp.target_enter_data map_entries(%[[MAP0]], %[[MAP1]], %[[MAP2]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) // CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: %[[MAP4:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} -// CHECK: %[[MAP5:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} +// CHECK: %[[MAP5:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(storage) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: %[[MAP6:.*]] = omp.map.info var_ptr(%[[ARG3]] : !llvm.ptr, i32) map_clauses(always, delete) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: omp.target_exit_data map_entries(%[[MAP3]], %[[MAP4]], %[[MAP5]], %[[MAP6]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @_QPomp_target_data(%a : !llvm.ptr, %b : !llvm.ptr, %c : !llvm.ptr, %d : !llvm.ptr) { %0 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} %1 = omp.map.info var_ptr(%b : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - %2 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} + %2 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(always, storage) capture(ByRef) -> !llvm.ptr {name = ""} omp.target_enter_data map_entries(%0, %1, %2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {} %3 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} %4 = omp.map.info var_ptr(%b : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %5 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} + %5 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(storage) capture(ByRef) -> !llvm.ptr {name = ""} %6 = omp.map.info var_ptr(%d : !llvm.ptr, i32) map_clauses(always, delete) capture(ByRef) -> !llvm.ptr {name = ""} omp.target_exit_data map_entries(%3, %4, %5, %6 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {} llvm.return @@ -266,7 +266,7 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) { // CHECK: %[[ARG_1:.*]]: !llvm.ptr) { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(64 : i32) : i32 // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} -// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""} +// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""} // CHECK: omp.target thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { // CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32 // CHECK: llvm.store %[[VAL_1]], %[[BB_ARG1]] : i32, !llvm.ptr @@ -278,7 +278,7 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) { llvm.func @_QPomp_target(%a : !llvm.ptr, %i : !llvm.ptr) { %0 = llvm.mlir.constant(64 : i32) : i32 %1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %3 = omp.map.info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""} + %3 = omp.map.info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""} omp.target thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { %2 = llvm.mlir.constant(10 : i32) : i32 llvm.store %2, %arg1 : i32, !llvm.ptr diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index cbd863f..ac29e20 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -828,11 +828,11 @@ func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %devic // Test with optional map clause. // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(always, to) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - // CHECK: %[[MAP_C:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + // CHECK: %[[MAP_C:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target is_device_ptr(%[[VAL_4:.*]] : memref<i32>) has_device_addr(%[[MAP_A]] -> {{.*}} : memref<?xi32>) map_entries(%[[MAP_B]] -> {{.*}}, %[[MAP_C]] -> {{.*}} : memref<?xi32>, memref<?xi32>) { %mapv0 = omp.map.info var_ptr(%device_addr : memref<?xi32>, tensor<?xi32>) map_clauses(always, to) capture(ByRef) -> memref<?xi32> {name = ""} %mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} omp.target is_device_ptr(%device_ptr : memref<i32>) has_device_addr(%mapv0 -> %arg0 : memref<?xi32>) map_entries(%mapv1 -> %arg1, %mapv2 -> %arg2 : memref<?xi32>, memref<?xi32>) { omp.terminator } @@ -868,20 +868,20 @@ func.func @omp_target_data (%if_cond : i1, %device : si32, %device_ptr: memref<i } // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target_data map_entries(%[[MAP_A]], %[[MAP_B]] : memref<?xi32>, memref<?xi32>) %mapv3 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - %mapv4 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv4 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} omp.target_data map_entries(%mapv3, %mapv4 : memref<?xi32>, memref<?xi32>) {} - // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target_enter_data device(%[[VAL_1:.*]] : si32) if(%[[VAL_0:.*]]) map_entries(%[[MAP_A]] : memref<?xi32>) nowait - %mapv5 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv5 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} omp.target_enter_data if(%if_cond) device(%device : si32) nowait map_entries(%mapv5 : memref<?xi32>) - // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target_exit_data device(%[[VAL_1:.*]] : si32) if(%[[VAL_0:.*]]) map_entries(%[[MAP_A]] : memref<?xi32>) nowait - %mapv6 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv6 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} omp.target_exit_data if(%if_cond) device(%device : si32) nowait map_entries(%mapv6 : memref<?xi32>) // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(ompx_hold, to) capture(ByRef) -> memref<?xi32> {name = ""} @@ -2790,13 +2790,13 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () // CHECK: %[[C_12:.*]] = llvm.mlir.constant(2 : index) : i64 // CHECK: %[[C_13:.*]] = llvm.mlir.constant(2 : index) : i64 // CHECK: %[[BOUNDS1:.*]] = omp.map.bounds lower_bound(%[[C_11]] : i64) upper_bound(%[[C_10]] : i64) stride(%[[C_12]] : i64) start_idx(%[[C_13]] : i64) - // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByCopy) mapper(@my_mapper) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} + // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(ByCopy) mapper(@my_mapper) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} %6 = llvm.mlir.constant(9 : index) : i64 %7 = llvm.mlir.constant(1 : index) : i64 %8 = llvm.mlir.constant(2 : index) : i64 %9 = llvm.mlir.constant(2 : index) : i64 %10 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%8 : i64) start_idx(%9 : i64) - %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByCopy) mapper(@my_mapper) bounds(%10) -> !llvm.ptr {name = ""} + %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(ByCopy) mapper(@my_mapper) bounds(%10) -> !llvm.ptr {name = ""} // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { @@ -2806,14 +2806,14 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () // CHECK: omp.target_data map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr, !llvm.ptr) omp.target_data map_entries(%mapv1, %mapv2 : !llvm.ptr, !llvm.ptr){} - // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(VLAType) bounds(%[[BOUNDS0]]) -> !llvm.ptr {name = ""} + // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(VLAType) bounds(%[[BOUNDS0]]) -> !llvm.ptr {name = ""} // CHECK: omp.target_enter_data map_entries(%[[MAP2]] : !llvm.ptr) - %mapv3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(VLAType) bounds(%4) -> !llvm.ptr {name = ""} + %mapv3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(VLAType) bounds(%4) -> !llvm.ptr {name = ""} omp.target_enter_data map_entries(%mapv3 : !llvm.ptr){} - // CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(This) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} + // CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(This) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} // CHECK: omp.target_exit_data map_entries(%[[MAP3]] : !llvm.ptr) - %mapv4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(This) bounds(%10) -> !llvm.ptr {name = ""} + %mapv4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(This) bounds(%10) -> !llvm.ptr {name = ""} omp.target_exit_data map_entries(%mapv4 : !llvm.ptr){} return @@ -2852,7 +2852,7 @@ func.func @omp_target_enter_update_exit_data_depend(%a: memref<?xi32>, %b: memre // CHECK-NEXT: [[MAP2:%.*]] = omp.map.info %map_a = omp.map.info var_ptr(%a: memref<?xi32>, tensor<?xi32>) map_clauses(to) capture(ByRef) -> memref<?xi32> %map_b = omp.map.info var_ptr(%b: memref<?xi32>, tensor<?xi32>) map_clauses(from) capture(ByRef) -> memref<?xi32> - %map_c = omp.map.info var_ptr(%c: memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> + %map_c = omp.map.info var_ptr(%c: memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> // Do some work on the host that writes to 'a' omp.task depend(taskdependout -> %a : memref<?xi32>) { @@ -3014,7 +3014,7 @@ func.func @parallel_op_reduction_and_private(%priv_var: !llvm.ptr, %priv_var2: ! // CHECK-LABEL: omp_target_private func.func @omp_target_private(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_var: !llvm.ptr) -> () { %mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target // CHECK-SAME: private( @@ -3047,7 +3047,7 @@ func.func @omp_target_private(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_ // CHECK-LABEL: omp_target_private_with_map_idx func.func @omp_target_private_with_map_idx(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_var: !llvm.ptr) -> () { %mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} - %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} // CHECK: omp.target @@ -3321,3 +3321,49 @@ func.func @omp_workdistribute() { } return } + +func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () { + // Test new map clause additions + // CHECK: %{{.*}}map_clauses(none){{.*}} + // CHECK: %{{.*}}map_clauses(to){{.*}} + // CHECK: %{{.*}}map_clauses(from){{.*}} + // CHECK: %{{.*}}map_clauses(tofrom){{.*}} + // CHECK: %{{.*}}map_clauses(storage){{.*}} + // CHECK: %{{.*}}map_clauses(delete){{.*}} + // CHECK: %{{.*}}map_clauses(return_param){{.*}} + // CHECK: %{{.*}}map_clauses(private){{.*}} + // CHECK: %{{.*}}map_clauses(literal){{.*}} + // CHECK: %{{.*}}map_clauses(implicit){{.*}} + // CHECK: %{{.*}}map_clauses(close){{.*}} + // CHECK: %{{.*}}map_clauses(present){{.*}} + // CHECK: %{{.*}}map_clauses(ompx_hold){{.*}} + // CHECK: %{{.*}}map_clauses(attach){{.*}} + // CHECK: %{{.*}}map_clauses(attach_always){{.*}} + // CHECK: %{{.*}}map_clauses(attach_none){{.*}} + // CHECK: %{{.*}}map_clauses(attach_auto){{.*}} + // CHECK: %{{.*}}map_clauses(ref_ptr){{.*}} + // CHECK: %{{.*}}map_clauses(ref_ptee){{.*}} + // CHECK: %{{.*}}map_clauses(ref_ptr_ptee){{.*}} + %mapv0 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(none) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv1 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(to) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv2 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(from) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv3 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv4 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv5 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(delete) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv6 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(return_param) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv7 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(private) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv8 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(literal) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv9 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(implicit) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv10 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(close) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv11 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(present) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv12 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ompx_hold) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv13 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv14 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_always) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv15 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_none) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv16 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_auto) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv17 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptr) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv18 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptee) capture(ByRef) -> memref<?xi32> {name = ""} + %mapv19 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptr_ptee) capture(ByRef) -> memref<?xi32> {name = ""} + + return +} diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index ec4d762..8935975 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -18,6 +18,7 @@ #include "kmp_config.h" #if KMP_ARCH_X86 || KMP_ARCH_X86_64 +.att_syntax # if defined(__ELF__) && defined(__CET__) && defined(__has_include) # if __has_include(<cet.h>) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5d87e32..2d9433f 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -3092,8 +3092,12 @@ libc_support_library( libc_support_library( name = "__support_sincosf_utils", - hdrs = ["src/__support/math/sincosf_utils.h"], + hdrs = [ + "src/__support/math/sincosf_utils.h", + "src/__support/math/sincosf_float_eval.h", + ], deps = [ + ":__support_fputil_double_double", ":__support_fputil_fp_bits", ":__support_fputil_polyeval", ":__support_range_reduction", |