aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/premerge.yaml2
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td73
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h7
-rw-r--r--clang/include/clang/Sema/Sema.h2
-rw-r--r--clang/include/clang/Sema/SemaBase.h9
-rw-r--r--clang/lib/AST/ByteCode/Compiler.cpp102
-rw-r--r--clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp74
-rw-r--r--clang/lib/Basic/Targets.cpp4
-rw-r--r--clang/lib/Basic/Targets/ARM.cpp3
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp5
-rw-r--r--clang/lib/Basic/Targets/RISCV.h6
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenAtomic.cpp143
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp95
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp814
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp7
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCXXABI.h25
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCleanup.cpp227
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCleanup.h112
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenException.cpp151
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExpr.cpp20
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp3
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp73
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.cpp43
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h70
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp75
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.cpp13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp121
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenTypeCache.h10
-rw-r--r--clang/lib/CIR/CodeGen/CMakeLists.txt3
-rw-r--r--clang/lib/CIR/CodeGen/EHScopeStack.h90
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp15
-rw-r--r--clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp94
-rw-r--r--clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp141
-rw-r--r--clang/lib/Driver/ToolChains/Arch/ARM.cpp5
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp11
-rw-r--r--clang/lib/Sema/Sema.cpp11
-rw-r--r--clang/lib/Sema/SemaBase.cpp17
-rw-r--r--clang/lib/Sema/SemaOverload.cpp13
-rw-r--r--clang/lib/Sema/SemaRISCV.cpp16
-rw-r--r--clang/test/CIR/CodeGen/atomic.c523
-rw-r--r--clang/test/CIR/CodeGen/builtin_inline.c91
-rw-r--r--clang/test/CIR/CodeGen/dtors.cpp12
-rw-r--r--clang/test/CIR/CodeGen/lambda.cpp24
-rw-r--r--clang/test/CIR/CodeGen/new.cpp121
-rw-r--r--clang/test/CIR/CodeGen/statement-exprs.c10
-rw-r--r--clang/test/CIR/CodeGen/struct.cpp64
-rw-r--r--clang/test/CIR/CodeGen/try-catch.cpp87
-rw-r--r--clang/test/CIR/CodeGen/vla.c59
-rw-r--r--clang/test/CIR/IR/invalid-atomic.cir7
-rw-r--r--clang/test/Driver/arm-abi.c2
-rw-r--r--clang/test/Driver/fuchsia.c9
-rw-r--r--clang/test/Preprocessor/riscv-atomics.c24
-rw-r--r--clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp161
-rw-r--r--clang/unittests/Format/FormatTestComments.cpp19
-rw-r--r--compiler-rt/cmake/builtin-config-ix.cmake24
-rw-r--r--compiler-rt/cmake/config-ix.cmake18
-rw-r--r--compiler-rt/lib/asan/asan_rtl_x86_64.S1
-rw-r--r--compiler-rt/lib/builtins/assembly.h4
-rw-r--r--compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S1
-rw-r--r--compiler-rt/lib/orc/elfnix_tls.x86-64.S1
-rw-r--r--compiler-rt/lib/orc/sysv_reenter.x86-64.S1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S2
-rw-r--r--flang-rt/lib/cuda/kernel.cpp28
-rw-r--r--flang/docs/FortranLLVMTestSuite.md2
-rw-r--r--flang/include/flang/Lower/DirectivesCommon.h1
-rw-r--r--flang/include/flang/Semantics/semantics.h1
-rw-r--r--flang/include/flang/Utils/OpenMP.h5
-rw-r--r--flang/lib/Lower/OpenACC.cpp229
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp56
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.h3
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp46
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp19
-rw-r--r--flang/lib/Lower/OpenMP/Utils.h2
-rw-r--r--flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp11
-rw-r--r--flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp15
-rw-r--r--flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp32
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp62
-rw-r--r--flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp7
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp4
-rw-r--r--flang/lib/Parser/openacc-parsers.cpp71
-rw-r--r--flang/lib/Parser/prescan.cpp26
-rw-r--r--flang/lib/Parser/prescan.h12
-rw-r--r--flang/lib/Semantics/check-omp-structure.h1
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp82
-rw-r--r--flang/lib/Semantics/resolve-names.cpp44
-rw-r--r--flang/lib/Semantics/semantics.cpp9
-rw-r--r--flang/lib/Utils/OpenMP.cpp18
-rw-r--r--flang/test/Fir/CUDA/cuda-global-addr.mlir20
-rw-r--r--flang/test/Fir/OpenACC/openacc-type-categories-class.f902
-rw-r--r--flang/test/Fir/convert-to-llvm-openmp-and-fir.fir8
-rw-r--r--flang/test/Lower/OpenACC/acc-declare-common-in-function.f9040
-rw-r--r--flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f906
-rw-r--r--flang/test/Lower/OpenMP/common-block-map.f902
-rw-r--r--flang/test/Lower/OpenMP/declare-mapper.f902
-rw-r--r--flang/test/Lower/OpenMP/defaultmap.f906
-rw-r--r--flang/test/Lower/OpenMP/has_device_addr-mapinfo.f904
-rw-r--r--flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f902
-rw-r--r--flang/test/Lower/OpenMP/optional-argument-map-2.f902
-rw-r--r--flang/test/Lower/OpenMP/target.f9012
-rw-r--r--flang/test/Preprocessing/bug164470.cuf6
-rw-r--r--flang/test/Semantics/OpenACC/bug1583.f9023
-rw-r--r--flang/test/Semantics/bug164303.f9031
-rw-r--r--flang/test/Transforms/DoConcurrent/map_shape_info.f908
-rw-r--r--flang/test/Transforms/DoConcurrent/non_reference_to_device.f902
-rw-r--r--flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir8
-rw-r--r--flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir8
-rw-r--r--flang/test/Transforms/omp-map-info-finalization.fir8
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt1
-rw-r--r--libc/config/linux/riscv/entrypoints.txt1
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt6
-rw-r--r--libc/config/linux/x86_64/headers.txt1
-rw-r--r--libc/docs/dev/undefined_behavior.rst7
-rw-r--r--libc/include/CMakeLists.txt8
-rw-r--r--libc/include/arpa/inet.yaml10
-rw-r--r--libc/include/llvm-libc-types/CMakeLists.txt1
-rw-r--r--libc/include/llvm-libc-types/nl_catd.h14
-rw-r--r--libc/include/nl_types.yaml31
-rw-r--r--libc/src/CMakeLists.txt1
-rw-r--r--libc/src/__support/FPUtil/double_double.h5
-rw-r--r--libc/src/__support/math/CMakeLists.txt1
-rw-r--r--libc/src/__support/math/cosf.h30
-rw-r--r--libc/src/__support/math/sincosf_float_eval.h223
-rw-r--r--libc/src/arpa/inet/CMakeLists.txt13
-rw-r--r--libc/src/arpa/inet/inet_aton.cpp57
-rw-r--r--libc/src/arpa/inet/inet_aton.h21
-rw-r--r--libc/src/math/generic/sinf.cpp24
-rw-r--r--libc/src/nl_types/CMakeLists.txt31
-rw-r--r--libc/src/nl_types/catclose.cpp22
-rw-r--r--libc/src/nl_types/catclose.h21
-rw-r--r--libc/src/nl_types/catgets.cpp25
-rw-r--r--libc/src/nl_types/catgets.h22
-rw-r--r--libc/src/nl_types/catopen.cpp26
-rw-r--r--libc/src/nl_types/catopen.h21
-rw-r--r--libc/test/src/CMakeLists.txt1
-rw-r--r--libc/test/src/arpa/inet/CMakeLists.txt11
-rw-r--r--libc/test/src/arpa/inet/inet_aton_test.cpp92
-rw-r--r--libc/test/src/math/CMakeLists.txt28
-rw-r--r--libc/test/src/math/cosf_float_test.cpp35
-rw-r--r--libc/test/src/math/exhaustive/CMakeLists.txt30
-rw-r--r--libc/test/src/math/exhaustive/cosf_float_test.cpp44
-rw-r--r--libc/test/src/math/exhaustive/exhaustive_test.h13
-rw-r--r--libc/test/src/math/exhaustive/sinf_float_test.cpp47
-rw-r--r--libc/test/src/math/sinf_float_test.cpp35
-rw-r--r--libc/test/src/nl_types/CMakeLists.txt14
-rw-r--r--libc/test/src/nl_types/nl_types_test.cpp33
-rw-r--r--libunwind/src/UnwindRegistersRestore.S3
-rw-r--r--libunwind/src/UnwindRegistersSave.S2
-rw-r--r--lldb/docs/resources/lldbgdbremote.md7
-rw-r--r--lldb/include/lldb/Symbol/DeclVendor.h1
-rw-r--r--lldb/include/lldb/Target/Process.h22
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt1
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp32
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp31
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h43
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp10
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp2
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h5
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp23
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp122
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h4
-rw-r--r--lldb/source/Target/Process.cpp43
-rw-r--r--lldb/test/API/CMakeLists.txt7
-rw-r--r--lldb/unittests/Target/MemoryTest.cpp142
-rw-r--r--llvm/docs/CommandGuide/llvm-dwarfdump.rst2
-rw-r--r--llvm/include/llvm/ADT/Twine.h10
-rw-r--r--llvm/include/llvm/CodeGen/MIR2Vec.h113
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h4
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h6
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td8
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.h2
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/TargetParser/Triple.h3
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp92
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--llvm/lib/CodeGen/MIR2Vec.cpp166
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp40
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp229
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp25
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td80
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp50
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp73
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp34
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h26
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp27
-rw-r--r--llvm/test/CodeGen/AMDGPU/add-max.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll26
-rw-r--r--llvm/test/CodeGen/ARM/llround-conv.ll9
-rw-r--r--llvm/test/CodeGen/ARM/lround-conv.ll43
-rw-r--r--llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll19
-rw-r--r--llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll33
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json22
-rw-r--r--llvm/test/CodeGen/MIR2Vec/if-else.mir144
-rw-r--r--llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir76
-rw-r--r--llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll10
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-fence.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-load-store.ll406
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll102
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-rmw.ll9014
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-signext.ll2644
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll8
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll5
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll43
-rw-r--r--llvm/test/CodeGen/X86/bitcnt-big-integer.ll3021
-rw-r--r--llvm/test/Instrumentation/AllocToken/intrinsic.ll32
-rw-r--r--llvm/test/Instrumentation/AllocToken/intrinsic32.ll32
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s52
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s52
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt52
-rw-r--r--llvm/test/Transforms/InstCombine/select-and-or.ll27
-rw-r--r--llvm/test/Transforms/InstCombine/select-safe-transforms.ll13
-rw-r--r--llvm/test/Transforms/InstCombine/select_with_identical_phi.ll243
-rw-r--r--llvm/tools/llc/llc.cpp15
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp7
-rw-r--r--llvm/unittests/CAS/OnDiskGraphDBTest.cpp2
-rw-r--r--llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp6
-rw-r--r--llvm/unittests/CodeGen/MIR2VecTest.cpp299
-rw-r--r--llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp2
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp10
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanTest.cpp26
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp16
-rw-r--r--llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn1
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td54
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td2
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp170
-rw-r--r--mlir/lib/IR/Diagnostics.cpp6
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp64
-rw-r--r--mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir12
-rw-r--r--mlir/test/Dialect/OpenMP/ops.mlir80
-rw-r--r--openmp/runtime/src/z_Linux_asm.S1
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel6
247 files changed, 23203 insertions, 1276 deletions
diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index 951fc16..6303a11 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -193,7 +193,7 @@ jobs:
uses: llvm/actions/install-ninja@main
- name: Build and Test
run: |
- source <(git diff --name-only HEAD~2..HEAD | python3 .ci/compute_projects.py)
+ source <(git diff --name-only HEAD~1...HEAD | python3 .ci/compute_projects.py)
if [[ "${projects_to_build}" == "" ]]; then
echo "No projects to build"
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index b1e6ba2..86d09d7 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4457,6 +4457,79 @@ def CIR_TryOp : CIR_Op<"try",[
// Atomic operations
//===----------------------------------------------------------------------===//
+def CIR_AtomicFetchKind : CIR_I32EnumAttr<
+ "AtomicFetchKind", "Binary opcode for atomic fetch-and-update operations", [
+ I32EnumAttrCase<"Add", 0, "add">,
+ I32EnumAttrCase<"Sub", 1, "sub">,
+ I32EnumAttrCase<"And", 2, "and">,
+ I32EnumAttrCase<"Xor", 3, "xor">,
+ I32EnumAttrCase<"Or", 4, "or">,
+ I32EnumAttrCase<"Nand", 5, "nand">,
+ I32EnumAttrCase<"Max", 6, "max">,
+ I32EnumAttrCase<"Min", 7, "min">
+]>;
+
+def CIR_AtomicFetchOp : CIR_Op<"atomic.fetch", [
+ AllTypesMatch<["result", "val"]>,
+ TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
+ "ptr", "val", "mlir::cast<cir::PointerType>($_self).getPointee()">
+]> {
+ let summary = "Atomic fetch-and-update operation";
+ let description = [{
+ C/C++ atomic fetch-and-update operation. This operation implements the C/C++
+ builtin functions `__atomic_<binop>_fetch`, `__atomic_fetch_<binop>`, and
+ `__c11_atomic_fetch_<binop>`, where `<binop>` is one of the following binary
+ opcodes: `add`, `sub`, `and`, `xor`, `or`, `nand`, `max`, and `min`.
+
+ This operation takes 2 arguments: a pointer `ptr` and a value `val`. The
+ type of `val` must match the pointee type of `ptr`. If the binary operation
+ is `add`, `sub`, `max`, or `min`, the type of `val` may either be an integer
+ type or a floating-point type. Otherwise, `val` must be an integer.
+
+ This operation atomically loads the value from `ptr`, performs the binary
+ operation as indicated by `binop` on the loaded value and `val`, and stores
+ the result back to `ptr`. If the `fetch_first` flag is present, the result
+ of this operation is the old value loaded from `ptr` before the binary
+ operation. Otherwise, the result of this operation is the result of the
+ binary operation.
+
+ Example:
+ %res = cir.atomic.fetch add seq_cst %ptr, %val
+ : (!cir.ptr<!s32i>, !s32i) -> !s32i
+ }];
+ let results = (outs CIR_AnyIntOrFloatType:$result);
+ let arguments = (ins
+ Arg<CIR_PtrToIntOrFloatType, "", [MemRead, MemWrite]>:$ptr,
+ CIR_AnyIntOrFloatType:$val,
+ CIR_AtomicFetchKind:$binop,
+ Arg<CIR_MemOrder, "memory order">:$mem_order,
+ UnitAttr:$is_volatile,
+ UnitAttr:$fetch_first
+ );
+
+ let assemblyFormat = [{
+ $binop $mem_order
+ (`fetch_first` $fetch_first^)?
+ $ptr `,` $val
+ (`volatile` $is_volatile^)?
+ `:` `(` qualified(type($ptr)) `,` qualified(type($val)) `)`
+ `->` type($result) attr-dict
+ }];
+
+ let hasVerifier = 1;
+
+ let extraLLVMLoweringPatternDecl = [{
+ mlir::Value buildPostOp(cir::AtomicFetchOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter,
+ mlir::Value rmwVal, bool isInt) const;
+
+ mlir::Value buildMinMaxPostOp(cir::AtomicFetchOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter,
+ mlir::Value rmwVal, bool isInt,
+ bool isSigned) const;
+ }];
+}
+
def CIR_AtomicXchgOp : CIR_Op<"atomic.xchg", [
AllTypesMatch<["result", "val"]>,
TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 01da626..598e826a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -119,6 +119,7 @@ struct MissingFeatures {
static bool opCallLandingPad() { return false; }
static bool opCallContinueBlock() { return false; }
static bool opCallChain() { return false; }
+ static bool opCallExceptionAttr() { return false; }
// CXXNewExpr
static bool exprNewNullCheck() { return false; }
@@ -218,6 +219,9 @@ struct MissingFeatures {
static bool checkBitfieldClipping() { return false; }
static bool cirgenABIInfo() { return false; }
static bool cleanupAfterErrorDiags() { return false; }
+ static bool cleanupAppendInsts() { return false; }
+ static bool cleanupBranchThrough() { return false; }
+ static bool cleanupIndexAndBIAdjustment() { return false; }
static bool cleanupsToDeactivate() { return false; }
static bool constEmitterAggILE() { return false; }
static bool constEmitterArrayILE() { return false; }
@@ -238,6 +242,7 @@ struct MissingFeatures {
static bool deleteArray() { return false; }
static bool devirtualizeMemberFunction() { return false; }
static bool ehCleanupFlags() { return false; }
+ static bool ehCleanupHasPrebranchedFallthrough() { return false; }
static bool ehCleanupScope() { return false; }
static bool ehCleanupScopeRequiresEHCleanup() { return false; }
static bool ehCleanupBranchFixups() { return false; }
@@ -256,6 +261,7 @@ struct MissingFeatures {
static bool generateDebugInfo() { return false; }
static bool globalViewIndices() { return false; }
static bool globalViewIntLowering() { return false; }
+ static bool handleBuiltinICEArguments() { return false; }
static bool hip() { return false; }
static bool incrementProfileCounter() { return false; }
static bool innermostEHScope() { return false; }
@@ -294,6 +300,7 @@ struct MissingFeatures {
static bool setNonGC() { return false; }
static bool setObjCGCLValueClass() { return false; }
static bool setTargetAttributes() { return false; }
+ static bool simplifyCleanupEntry() { return false; }
static bool sourceLanguageCases() { return false; }
static bool stackBase() { return false; }
static bool stackSaveOp() { return false; }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index cb21335..87b96c2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10021,7 +10021,7 @@ public:
public:
DeferDiagsRAII(Sema &S, bool DeferDiags)
: S(S), SavedDeferDiags(S.DeferDiags) {
- S.DeferDiags = DeferDiags;
+ S.DeferDiags = SavedDeferDiags || DeferDiags;
}
~DeferDiagsRAII() { S.DeferDiags = SavedDeferDiags; }
};
diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h
index 550f530..8e43b0b 100644
--- a/clang/include/clang/Sema/SemaBase.h
+++ b/clang/include/clang/Sema/SemaBase.h
@@ -212,16 +212,13 @@ public:
};
/// Emit a diagnostic.
- SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID,
- bool DeferHint = false);
+ SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID);
/// Emit a partial diagnostic.
- SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD,
- bool DeferHint = false);
+ SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD);
/// Emit a compatibility diagnostic.
- SemaDiagnosticBuilder DiagCompat(SourceLocation Loc, unsigned CompatDiagId,
- bool DeferHint = false);
+ SemaDiagnosticBuilder DiagCompat(SourceLocation Loc, unsigned CompatDiagId);
/// Build a partial diagnostic.
PartialDiagnostic PDiag(unsigned DiagID = 0);
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index f7731f0..6b98927 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4841,46 +4841,39 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init,
return !NeedsOp || this->emitCheckDecl(VD, VD);
};
- auto initGlobal = [&](unsigned GlobalIndex) -> bool {
- assert(Init);
-
- if (VarT) {
- if (!this->visit(Init))
- return checkDecl() && false;
-
- return checkDecl() && this->emitInitGlobal(*VarT, GlobalIndex, VD);
- }
-
- if (!checkDecl())
- return false;
-
- if (!this->emitGetPtrGlobal(GlobalIndex, Init))
- return false;
-
- if (!visitInitializer(Init))
- return false;
-
- return this->emitFinishInitGlobal(Init);
- };
-
DeclScope<Emitter> LocalScope(this, VD);
- // We've already seen and initialized this global.
- if (UnsignedOrNone GlobalIndex = P.getGlobal(VD)) {
+ UnsignedOrNone GlobalIndex = P.getGlobal(VD);
+ if (GlobalIndex) {
+ // We've already seen and initialized this global.
if (P.getPtrGlobal(*GlobalIndex).isInitialized())
return checkDecl();
-
// The previous attempt at initialization might've been unsuccessful,
// so let's try this one.
- return !Init || (checkDecl() && initGlobal(*GlobalIndex));
+ } else if ((GlobalIndex = P.createGlobal(VD, Init))) {
+ } else {
+ return false;
}
+ if (!Init)
+ return true;
- UnsignedOrNone GlobalIndex = P.createGlobal(VD, Init);
+ if (!checkDecl())
+ return false;
- if (!GlobalIndex)
+ if (VarT) {
+ if (!this->visit(Init))
+ return false;
+
+ return this->emitInitGlobal(*VarT, *GlobalIndex, VD);
+ }
+
+ if (!this->emitGetPtrGlobal(*GlobalIndex, Init))
+ return false;
+
+ if (!visitInitializer(Init))
return false;
- return !Init || (checkDecl() && initGlobal(*GlobalIndex));
+ return this->emitFinishInitGlobal(Init);
}
// Local variables.
InitLinkScope<Emitter> ILS(this, InitLink::Decl(VD));
@@ -4890,36 +4883,37 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init,
VD, *VarT, VD->getType().isConstQualified(),
VD->getType().isVolatileQualified(), nullptr, ScopeKind::Block,
IsConstexprUnknown);
- if (Init) {
- // If this is a toplevel declaration, create a scope for the
- // initializer.
- if (Toplevel) {
- LocalScope<Emitter> Scope(this);
- if (!this->visit(Init))
- return false;
- return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals();
- }
- if (!this->visit(Init))
- return false;
- return this->emitSetLocal(*VarT, Offset, VD);
- }
- } else {
- if (UnsignedOrNone Offset = this->allocateLocal(
- VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) {
- if (!Init)
- return true;
- if (!this->emitGetPtrLocal(*Offset, Init))
- return false;
+ if (!Init)
+ return true;
- if (!visitInitializer(Init))
+ // If this is a toplevel declaration, create a scope for the
+ // initializer.
+ if (Toplevel) {
+ LocalScope<Emitter> Scope(this);
+ if (!this->visit(Init))
return false;
-
- return this->emitFinishInitPop(Init);
+ return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals();
}
- return false;
+ if (!this->visit(Init))
+ return false;
+ return this->emitSetLocal(*VarT, Offset, VD);
}
- return true;
+ // Local composite variables.
+ if (UnsignedOrNone Offset = this->allocateLocal(
+ VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) {
+ if (!Init)
+ return true;
+
+ if (!this->emitGetPtrLocal(*Offset, Init))
+ return false;
+
+ if (!visitInitializer(Init))
+ return false;
+
+ return this->emitFinishInitPop(Init);
+ }
+ return false;
}
template <class Emitter>
diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp
index c88a470..f068be5 100644
--- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp
@@ -24,6 +24,7 @@
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/MatchSwitch.h"
+#include "clang/Analysis/FlowSensitive/RecordOps.h"
#include "clang/Analysis/FlowSensitive/StorageLocation.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/LLVM.h"
@@ -95,6 +96,18 @@ static QualType getStatusOrValueType(ClassTemplateSpecializationDecl *TRD) {
return TRD->getTemplateArgs().get(0).getAsType();
}
+static auto ofClassStatus() {
+ using namespace ::clang::ast_matchers; // NOLINT: Too many names
+ return ofClass(hasName("::absl::Status"));
+}
+
+static auto isStatusMemberCallWithName(llvm::StringRef member_name) {
+ using namespace ::clang::ast_matchers; // NOLINT: Too many names
+ return cxxMemberCallExpr(
+ on(expr(unless(cxxThisExpr()))),
+ callee(cxxMethodDecl(hasName(member_name), ofClassStatus())));
+}
+
static auto isStatusOrMemberCallWithName(llvm::StringRef member_name) {
using namespace ::clang::ast_matchers; // NOLINT: Too many names
return cxxMemberCallExpr(
@@ -244,6 +257,61 @@ static void transferStatusOrOkCall(const CXXMemberCallExpr *Expr,
State.Env.setValue(*Expr, OkVal);
}
+static void transferStatusCall(const CXXMemberCallExpr *Expr,
+ const MatchFinder::MatchResult &,
+ LatticeTransferState &State) {
+ RecordStorageLocation *StatusOrLoc =
+ getImplicitObjectLocation(*Expr, State.Env);
+ if (StatusOrLoc == nullptr)
+ return;
+
+ RecordStorageLocation &StatusLoc = locForStatus(*StatusOrLoc);
+
+ if (State.Env.getValue(locForOk(StatusLoc)) == nullptr)
+ initializeStatusOr(*StatusOrLoc, State.Env);
+
+ if (Expr->isPRValue())
+ copyRecord(StatusLoc, State.Env.getResultObjectLocation(*Expr), State.Env);
+ else
+ State.Env.setStorageLocation(*Expr, StatusLoc);
+}
+
+static void transferStatusOkCall(const CXXMemberCallExpr *Expr,
+ const MatchFinder::MatchResult &,
+ LatticeTransferState &State) {
+ RecordStorageLocation *StatusLoc =
+ getImplicitObjectLocation(*Expr, State.Env);
+ if (StatusLoc == nullptr)
+ return;
+
+ if (Value *Val = State.Env.getValue(locForOk(*StatusLoc)))
+ State.Env.setValue(*Expr, *Val);
+}
+
+static void transferStatusUpdateCall(const CXXMemberCallExpr *Expr,
+ const MatchFinder::MatchResult &,
+ LatticeTransferState &State) {
+ // S.Update(OtherS) sets S to the error code of OtherS if it is OK,
+ // otherwise does nothing.
+ assert(Expr->getNumArgs() == 1);
+ auto *Arg = Expr->getArg(0);
+ RecordStorageLocation *ArgRecord =
+ Arg->isPRValue() ? &State.Env.getResultObjectLocation(*Arg)
+ : State.Env.get<RecordStorageLocation>(*Arg);
+ RecordStorageLocation *ThisLoc = getImplicitObjectLocation(*Expr, State.Env);
+ if (ThisLoc == nullptr || ArgRecord == nullptr)
+ return;
+
+ auto &ThisOkVal = valForOk(*ThisLoc, State.Env);
+ auto &ArgOkVal = valForOk(*ArgRecord, State.Env);
+ auto &A = State.Env.arena();
+ auto &NewVal = State.Env.makeAtomicBoolValue();
+ State.Env.assume(A.makeImplies(A.makeNot(ThisOkVal.formula()),
+ A.makeNot(NewVal.formula())));
+ State.Env.assume(A.makeImplies(NewVal.formula(), ArgOkVal.formula()));
+ State.Env.setValue(locForOk(*ThisLoc), NewVal);
+}
+
CFGMatchSwitch<LatticeTransferState>
buildTransferMatchSwitch(ASTContext &Ctx,
CFGMatchSwitchBuilder<LatticeTransferState> Builder) {
@@ -251,6 +319,12 @@ buildTransferMatchSwitch(ASTContext &Ctx,
return std::move(Builder)
.CaseOfCFGStmt<CXXMemberCallExpr>(isStatusOrMemberCallWithName("ok"),
transferStatusOrOkCall)
+ .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusOrMemberCallWithName("status"),
+ transferStatusCall)
+ .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusMemberCallWithName("ok"),
+ transferStatusOkCall)
+ .CaseOfCFGStmt<CXXMemberCallExpr>(isStatusMemberCallWithName("Update"),
+ transferStatusUpdateCall)
.Build();
}
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index b7e8bad..f39c698 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -222,6 +222,8 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
return std::make_unique<OHOSTargetInfo<ARMleTargetInfo>>(Triple, Opts);
case llvm::Triple::FreeBSD:
return std::make_unique<FreeBSDTargetInfo<ARMleTargetInfo>>(Triple, Opts);
+ case llvm::Triple::Fuchsia:
+ return std::make_unique<FuchsiaTargetInfo<ARMleTargetInfo>>(Triple, Opts);
case llvm::Triple::NetBSD:
return std::make_unique<NetBSDTargetInfo<ARMleTargetInfo>>(Triple, Opts);
case llvm::Triple::OpenBSD:
@@ -254,6 +256,8 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
return std::make_unique<AppleMachOARMTargetInfo>(Triple, Opts);
switch (os) {
+ case llvm::Triple::Fuchsia:
+ return std::make_unique<FuchsiaTargetInfo<ARMbeTargetInfo>>(Triple, Opts);
case llvm::Triple::Linux:
return std::make_unique<LinuxTargetInfo<ARMbeTargetInfo>>(Triple, Opts);
case llvm::Triple::NetBSD:
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 3de17d2..d00a3a4 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -260,6 +260,7 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple,
: TargetInfo(Triple), FPMath(FP_Default), IsAAPCS(true), LDREX(0),
HW_FP(0) {
bool IsFreeBSD = Triple.isOSFreeBSD();
+ bool IsFuchsia = Triple.isOSFuchsia();
bool IsOpenBSD = Triple.isOSOpenBSD();
bool IsNetBSD = Triple.isOSNetBSD();
bool IsHaiku = Triple.isOSHaiku();
@@ -332,7 +333,7 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple,
default:
if (IsNetBSD)
setABI("apcs-gnu");
- else if (IsFreeBSD || IsOpenBSD || IsHaiku || IsOHOS)
+ else if (IsFreeBSD || IsFuchsia || IsOpenBSD || IsHaiku || IsOHOS)
setABI("aapcs-linux");
else
setABI("aapcs");
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 04da4e6..685925b 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -192,8 +192,11 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__riscv_muldiv");
}
- if (ISAInfo->hasExtension("a")) {
+ // The "a" extension is composed of "zalrsc" and "zaamo"
+ if (ISAInfo->hasExtension("a"))
Builder.defineMacro("__riscv_atomic");
+
+ if (ISAInfo->hasExtension("zalrsc")) {
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index d8b0e64..85fa4cc 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -195,7 +195,8 @@ public:
void setMaxAtomicWidth() override {
MaxAtomicPromoteWidth = 128;
- if (ISAInfo->hasExtension("a"))
+ // "a" implies "zalrsc" which is sufficient to inline atomics
+ if (ISAInfo->hasExtension("zalrsc"))
MaxAtomicInlineWidth = 32;
}
};
@@ -225,7 +226,8 @@ public:
void setMaxAtomicWidth() override {
MaxAtomicPromoteWidth = 128;
- if (ISAInfo->hasExtension("a"))
+ // "a" implies "zalrsc" which is sufficient to inline atomics
+ if (ISAInfo->hasExtension("zalrsc"))
MaxAtomicInlineWidth = 64;
}
};
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 67ca60c..7db6e28 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -346,6 +346,8 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Location loc = cgf.getLoc(expr->getSourceRange());
auto orderAttr = cir::MemOrderAttr::get(builder.getContext(), order);
+ cir::AtomicFetchKindAttr fetchAttr;
+ bool fetchFirst = true;
switch (expr->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
@@ -407,6 +409,86 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
opName = cir::AtomicXchgOp::getOperationName();
break;
+ case AtomicExpr::AO__atomic_add_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__atomic_fetch_add:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Add);
+ break;
+
+ case AtomicExpr::AO__atomic_sub_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__atomic_fetch_sub:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Sub);
+ break;
+
+ case AtomicExpr::AO__atomic_min_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_min:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Min);
+ break;
+
+ case AtomicExpr::AO__atomic_max_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_max:
+ case AtomicExpr::AO__atomic_fetch_max:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Max);
+ break;
+
+ case AtomicExpr::AO__atomic_and_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__atomic_fetch_and:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::And);
+ break;
+
+ case AtomicExpr::AO__atomic_or_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__atomic_fetch_or:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Or);
+ break;
+
+ case AtomicExpr::AO__atomic_xor_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__atomic_fetch_xor:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Xor);
+ break;
+
+ case AtomicExpr::AO__atomic_nand_fetch:
+ fetchFirst = false;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
+ case AtomicExpr::AO__atomic_fetch_nand:
+ opName = cir::AtomicFetchOp::getOperationName();
+ fetchAttr = cir::AtomicFetchKindAttr::get(builder.getContext(),
+ cir::AtomicFetchKind::Nand);
+ break;
+
case AtomicExpr::AO__atomic_test_and_set: {
auto op = cir::AtomicTestAndSetOp::create(
builder, loc, ptr.getPointer(), order,
@@ -450,74 +532,50 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
case AtomicExpr::AO__scoped_atomic_exchange_n:
case AtomicExpr::AO__scoped_atomic_exchange:
- case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__scoped_atomic_add_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
- case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__scoped_atomic_fetch_add:
- case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__scoped_atomic_sub_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__hip_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
- case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__scoped_atomic_fetch_sub:
- case AtomicExpr::AO__atomic_min_fetch:
case AtomicExpr::AO__scoped_atomic_min_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
- case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__scoped_atomic_fetch_min:
- case AtomicExpr::AO__atomic_max_fetch:
case AtomicExpr::AO__scoped_atomic_max_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
- case AtomicExpr::AO__atomic_fetch_max:
case AtomicExpr::AO__scoped_atomic_fetch_max:
- case AtomicExpr::AO__atomic_and_fetch:
case AtomicExpr::AO__scoped_atomic_and_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
- case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__scoped_atomic_fetch_and:
- case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__scoped_atomic_or_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
- case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__scoped_atomic_fetch_or:
- case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__scoped_atomic_xor_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
- case AtomicExpr::AO__atomic_fetch_xor:
case AtomicExpr::AO__scoped_atomic_fetch_xor:
- case AtomicExpr::AO__atomic_nand_fetch:
case AtomicExpr::AO__scoped_atomic_nand_fetch:
- case AtomicExpr::AO__c11_atomic_fetch_nand:
- case AtomicExpr::AO__atomic_fetch_nand:
case AtomicExpr::AO__scoped_atomic_fetch_nand:
cgf.cgm.errorNYI(expr->getSourceRange(), "emitAtomicOp: expr op NYI");
return;
@@ -531,9 +589,13 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
mlir::Operation *rmwOp = builder.create(loc, builder.getStringAttr(opName),
atomicOperands, atomicResTys);
+ if (fetchAttr)
+ rmwOp->setAttr("binop", fetchAttr);
rmwOp->setAttr("mem_order", orderAttr);
if (expr->isVolatile())
rmwOp->setAttr("is_volatile", builder.getUnitAttr());
+ if (fetchFirst && opName == cir::AtomicFetchOp::getOperationName())
+ rmwOp->setAttr("fetch_first", builder.getUnitAttr());
mlir::Value result = rmwOp->getResult(0);
builder.createStore(loc, result, dest);
@@ -629,8 +691,41 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) {
isWeakExpr = e->getWeak();
break;
+ case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__c11_atomic_fetch_sub:
+ if (memTy->isPointerType()) {
+ cgm.errorNYI(e->getSourceRange(),
+ "atomic fetch-and-add and fetch-and-sub for pointers");
+ return RValue::get(nullptr);
+ }
+ [[fallthrough]];
+ case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__atomic_add_fetch:
+ case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__c11_atomic_fetch_max:
+ case AtomicExpr::AO__c11_atomic_fetch_min:
+ shouldCastToIntPtrTy = !memTy->isFloatingType();
+ [[fallthrough]];
+
+ case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
+ case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__c11_atomic_store:
val1 = emitValToTemp(*this, e->getVal1());
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index ea31871..798e9d9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -463,12 +463,107 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitLibraryCall(*this, fd, e,
cgm.getBuiltinLibFunction(fd, builtinID));
+ // Some target-specific builtins can have aggregate return values, e.g.
+ // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
+ // returnValue to be non-null, so that the target-specific emission code can
+ // always just emit into it.
+ cir::TypeEvaluationKind evalKind = getEvaluationKind(e->getType());
+ if (evalKind == cir::TEK_Aggregate && returnValue.isNull()) {
+ cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin");
+ return getUndefRValue(e->getType());
+ }
+
+ // Now see if we can emit a target-specific builtin.
+ if (mlir::Value v = emitTargetBuiltinExpr(builtinID, e, returnValue)) {
+ switch (evalKind) {
+ case cir::TEK_Scalar:
+ if (mlir::isa<cir::VoidType>(v.getType()))
+ return RValue::get(nullptr);
+ return RValue::get(v);
+ case cir::TEK_Aggregate:
+ cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin");
+ return getUndefRValue(e->getType());
+ case cir::TEK_Complex:
+ llvm_unreachable("No current target builtin returns complex");
+ }
+ llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
+ }
+
cgm.errorNYI(e->getSourceRange(),
std::string("unimplemented builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return getUndefRValue(e->getType());
}
+static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf,
+ unsigned builtinID,
+ const CallExpr *e,
+ ReturnValueSlot &returnValue,
+ llvm::Triple::ArchType arch) {
+ // When compiling in HipStdPar mode we have to be conservative in rejecting
+ // target specific features in the FE, and defer the possible error to the
+ // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+ // referenced by an accelerator executable function, we emit an error.
+ // Returning nullptr here leads to the builtin being handled in
+ // EmitStdParUnsupportedBuiltin.
+ if (cgf->getLangOpts().HIPStdPar && cgf->getLangOpts().CUDAIsDevice &&
+ arch != cgf->getTarget().getTriple().getArch())
+ return {};
+
+ switch (arch) {
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb:
+ case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_32:
+ case llvm::Triple::aarch64_be:
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::bpfel:
+ // These are actually NYI, but that will be reported by emitBuiltinExpr.
+ // At this point, we don't even know that the builtin is target-specific.
+ return nullptr;
+
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ return cgf->emitX86BuiltinExpr(builtinID, e);
+
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppcle:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ppc64le:
+ case llvm::Triple::r600:
+ case llvm::Triple::amdgcn:
+ case llvm::Triple::systemz:
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ case llvm::Triple::wasm32:
+ case llvm::Triple::wasm64:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
+ // These are actually NYI, but that will be reported by emitBuiltinExpr.
+ // At this point, we don't even know that the builtin is target-specific.
+ return {};
+ default:
+ return {};
+ }
+}
+
+mlir::Value
+CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e,
+ ReturnValueSlot &returnValue) {
+ if (getContext().BuiltinInfo.isAuxBuiltinID(builtinID)) {
+ assert(getContext().getAuxTargetInfo() && "Missing aux target info");
+ return emitTargetArchBuiltinExpr(
+ this, getContext().BuiltinInfo.getAuxBuiltinID(builtinID), e,
+ returnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
+ }
+
+ return emitTargetArchBuiltinExpr(this, builtinID, e, returnValue,
+ getTarget().getTriple().getArch());
+}
+
/// Given a builtin id for a function like "__builtin_fabsf", return a Function*
/// for "fabsf".
cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd,
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
new file mode 100644
index 0000000..3c9c7ec
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -0,0 +1,814 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
+// call to be later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/IR/IntrinsicsX86.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
+ const CallExpr *e) {
+ if (builtinID == Builtin::BI__builtin_cpu_is) {
+ cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_supports) {
+ cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_init) {
+ cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
+ return {};
+ }
+
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ assert(!cir::MissingFeatures::msvcBuiltins());
+
+ // Find out if any arguments are required to be integer constant expressions.
+ assert(!cir::MissingFeatures::handleBuiltinICEArguments());
+
+ switch (builtinID) {
+ default:
+ return {};
+ case X86::BI_mm_prefetch:
+ case X86::BI_mm_clflush:
+ case X86::BI_mm_lfence:
+ case X86::BI_mm_pause:
+ case X86::BI_mm_mfence:
+ case X86::BI_mm_sfence:
+ case X86::BI__rdtsc:
+ case X86::BI__builtin_ia32_rdtscp:
+ case X86::BI__builtin_ia32_lzcnt_u16:
+ case X86::BI__builtin_ia32_lzcnt_u32:
+ case X86::BI__builtin_ia32_lzcnt_u64:
+ case X86::BI__builtin_ia32_tzcnt_u16:
+ case X86::BI__builtin_ia32_tzcnt_u32:
+ case X86::BI__builtin_ia32_tzcnt_u64:
+ case X86::BI__builtin_ia32_undef128:
+ case X86::BI__builtin_ia32_undef256:
+ case X86::BI__builtin_ia32_undef512:
+ case X86::BI__builtin_ia32_vec_ext_v4hi:
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di:
+ case X86::BI__builtin_ia32_vec_set_v4hi:
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di:
+ case X86::BI_mm_setcsr:
+ case X86::BI__builtin_ia32_ldmxcsr:
+ case X86::BI_mm_getcsr:
+ case X86::BI__builtin_ia32_stmxcsr:
+ case X86::BI__builtin_ia32_xsave:
+ case X86::BI__builtin_ia32_xsave64:
+ case X86::BI__builtin_ia32_xrstor:
+ case X86::BI__builtin_ia32_xrstor64:
+ case X86::BI__builtin_ia32_xsaveopt:
+ case X86::BI__builtin_ia32_xsaveopt64:
+ case X86::BI__builtin_ia32_xrstors:
+ case X86::BI__builtin_ia32_xrstors64:
+ case X86::BI__builtin_ia32_xsavec:
+ case X86::BI__builtin_ia32_xsavec64:
+ case X86::BI__builtin_ia32_xsaves:
+ case X86::BI__builtin_ia32_xsaves64:
+ case X86::BI__builtin_ia32_xsetbv:
+ case X86::BI_xsetbv:
+ case X86::BI__builtin_ia32_xgetbv:
+ case X86::BI_xgetbv:
+ case X86::BI__builtin_ia32_storedqudi128_mask:
+ case X86::BI__builtin_ia32_storedqusi128_mask:
+ case X86::BI__builtin_ia32_storedquhi128_mask:
+ case X86::BI__builtin_ia32_storedquqi128_mask:
+ case X86::BI__builtin_ia32_storeupd128_mask:
+ case X86::BI__builtin_ia32_storeups128_mask:
+ case X86::BI__builtin_ia32_storedqudi256_mask:
+ case X86::BI__builtin_ia32_storedqusi256_mask:
+ case X86::BI__builtin_ia32_storedquhi256_mask:
+ case X86::BI__builtin_ia32_storedquqi256_mask:
+ case X86::BI__builtin_ia32_storeupd256_mask:
+ case X86::BI__builtin_ia32_storeups256_mask:
+ case X86::BI__builtin_ia32_storedqudi512_mask:
+ case X86::BI__builtin_ia32_storedqusi512_mask:
+ case X86::BI__builtin_ia32_storedquhi512_mask:
+ case X86::BI__builtin_ia32_storedquqi512_mask:
+ case X86::BI__builtin_ia32_storeupd512_mask:
+ case X86::BI__builtin_ia32_storeups512_mask:
+ case X86::BI__builtin_ia32_storesbf16128_mask:
+ case X86::BI__builtin_ia32_storesh128_mask:
+ case X86::BI__builtin_ia32_storess128_mask:
+ case X86::BI__builtin_ia32_storesd128_mask:
+ case X86::BI__builtin_ia32_cvtmask2b128:
+ case X86::BI__builtin_ia32_cvtmask2b256:
+ case X86::BI__builtin_ia32_cvtmask2b512:
+ case X86::BI__builtin_ia32_cvtmask2w128:
+ case X86::BI__builtin_ia32_cvtmask2w256:
+ case X86::BI__builtin_ia32_cvtmask2w512:
+ case X86::BI__builtin_ia32_cvtmask2d128:
+ case X86::BI__builtin_ia32_cvtmask2d256:
+ case X86::BI__builtin_ia32_cvtmask2d512:
+ case X86::BI__builtin_ia32_cvtmask2q128:
+ case X86::BI__builtin_ia32_cvtmask2q256:
+ case X86::BI__builtin_ia32_cvtmask2q512:
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ case X86::BI__builtin_ia32_cvtdq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+ case X86::BI__builtin_ia32_vfmaddss3:
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ case X86::BI__builtin_ia32_vfmaddss:
+ case X86::BI__builtin_ia32_vfmaddsd:
+ case X86::BI__builtin_ia32_vfmaddsh3_maskz:
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsh3_mask3:
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ case X86::BI__builtin_ia32_vfmaddph512_mask:
+ case X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case X86::BI__builtin_ia32_movdqa32store128_mask:
+ case X86::BI__builtin_ia32_movdqa64store128_mask:
+ case X86::BI__builtin_ia32_storeaps128_mask:
+ case X86::BI__builtin_ia32_storeapd128_mask:
+ case X86::BI__builtin_ia32_movdqa32store256_mask:
+ case X86::BI__builtin_ia32_movdqa64store256_mask:
+ case X86::BI__builtin_ia32_storeaps256_mask:
+ case X86::BI__builtin_ia32_storeapd256_mask:
+ case X86::BI__builtin_ia32_movdqa32store512_mask:
+ case X86::BI__builtin_ia32_movdqa64store512_mask:
+ case X86::BI__builtin_ia32_storeaps512_mask:
+ case X86::BI__builtin_ia32_storeapd512_mask:
+ case X86::BI__builtin_ia32_loadups128_mask:
+ case X86::BI__builtin_ia32_loadups256_mask:
+ case X86::BI__builtin_ia32_loadups512_mask:
+ case X86::BI__builtin_ia32_loadupd128_mask:
+ case X86::BI__builtin_ia32_loadupd256_mask:
+ case X86::BI__builtin_ia32_loadupd512_mask:
+ case X86::BI__builtin_ia32_loaddquqi128_mask:
+ case X86::BI__builtin_ia32_loaddquqi256_mask:
+ case X86::BI__builtin_ia32_loaddquqi512_mask:
+ case X86::BI__builtin_ia32_loaddquhi128_mask:
+ case X86::BI__builtin_ia32_loaddquhi256_mask:
+ case X86::BI__builtin_ia32_loaddquhi512_mask:
+ case X86::BI__builtin_ia32_loaddqusi128_mask:
+ case X86::BI__builtin_ia32_loaddqusi256_mask:
+ case X86::BI__builtin_ia32_loaddqusi512_mask:
+ case X86::BI__builtin_ia32_loaddqudi128_mask:
+ case X86::BI__builtin_ia32_loaddqudi256_mask:
+ case X86::BI__builtin_ia32_loaddqudi512_mask:
+ case X86::BI__builtin_ia32_loadsbf16128_mask:
+ case X86::BI__builtin_ia32_loadsh128_mask:
+ case X86::BI__builtin_ia32_loadss128_mask:
+ case X86::BI__builtin_ia32_loadsd128_mask:
+ case X86::BI__builtin_ia32_loadaps128_mask:
+ case X86::BI__builtin_ia32_loadaps256_mask:
+ case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
+ case X86::BI__builtin_ia32_loadapd256_mask:
+ case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa32load256_mask:
+ case X86::BI__builtin_ia32_movdqa32load512_mask:
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load256_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask:
+ case X86::BI__builtin_ia32_expandloaddf128_mask:
+ case X86::BI__builtin_ia32_expandloaddf256_mask:
+ case X86::BI__builtin_ia32_expandloaddf512_mask:
+ case X86::BI__builtin_ia32_expandloadsf128_mask:
+ case X86::BI__builtin_ia32_expandloadsf256_mask:
+ case X86::BI__builtin_ia32_expandloadsf512_mask:
+ case X86::BI__builtin_ia32_expandloaddi128_mask:
+ case X86::BI__builtin_ia32_expandloaddi256_mask:
+ case X86::BI__builtin_ia32_expandloaddi512_mask:
+ case X86::BI__builtin_ia32_expandloadsi128_mask:
+ case X86::BI__builtin_ia32_expandloadsi256_mask:
+ case X86::BI__builtin_ia32_expandloadsi512_mask:
+ case X86::BI__builtin_ia32_expandloadhi128_mask:
+ case X86::BI__builtin_ia32_expandloadhi256_mask:
+ case X86::BI__builtin_ia32_expandloadhi512_mask:
+ case X86::BI__builtin_ia32_expandloadqi128_mask:
+ case X86::BI__builtin_ia32_expandloadqi256_mask:
+ case X86::BI__builtin_ia32_expandloadqi512_mask:
+ case X86::BI__builtin_ia32_compressstoredf128_mask:
+ case X86::BI__builtin_ia32_compressstoredf256_mask:
+ case X86::BI__builtin_ia32_compressstoredf512_mask:
+ case X86::BI__builtin_ia32_compressstoresf128_mask:
+ case X86::BI__builtin_ia32_compressstoresf256_mask:
+ case X86::BI__builtin_ia32_compressstoresf512_mask:
+ case X86::BI__builtin_ia32_compressstoredi128_mask:
+ case X86::BI__builtin_ia32_compressstoredi256_mask:
+ case X86::BI__builtin_ia32_compressstoredi512_mask:
+ case X86::BI__builtin_ia32_compressstoresi128_mask:
+ case X86::BI__builtin_ia32_compressstoresi256_mask:
+ case X86::BI__builtin_ia32_compressstoresi512_mask:
+ case X86::BI__builtin_ia32_compressstorehi128_mask:
+ case X86::BI__builtin_ia32_compressstorehi256_mask:
+ case X86::BI__builtin_ia32_compressstorehi512_mask:
+ case X86::BI__builtin_ia32_compressstoreqi128_mask:
+ case X86::BI__builtin_ia32_compressstoreqi256_mask:
+ case X86::BI__builtin_ia32_compressstoreqi512_mask:
+ case X86::BI__builtin_ia32_expanddf128_mask:
+ case X86::BI__builtin_ia32_expanddf256_mask:
+ case X86::BI__builtin_ia32_expanddf512_mask:
+ case X86::BI__builtin_ia32_expandsf128_mask:
+ case X86::BI__builtin_ia32_expandsf256_mask:
+ case X86::BI__builtin_ia32_expandsf512_mask:
+ case X86::BI__builtin_ia32_expanddi128_mask:
+ case X86::BI__builtin_ia32_expanddi256_mask:
+ case X86::BI__builtin_ia32_expanddi512_mask:
+ case X86::BI__builtin_ia32_expandsi128_mask:
+ case X86::BI__builtin_ia32_expandsi256_mask:
+ case X86::BI__builtin_ia32_expandsi512_mask:
+ case X86::BI__builtin_ia32_expandhi128_mask:
+ case X86::BI__builtin_ia32_expandhi256_mask:
+ case X86::BI__builtin_ia32_expandhi512_mask:
+ case X86::BI__builtin_ia32_expandqi128_mask:
+ case X86::BI__builtin_ia32_expandqi256_mask:
+ case X86::BI__builtin_ia32_expandqi512_mask:
+ case X86::BI__builtin_ia32_compressdf128_mask:
+ case X86::BI__builtin_ia32_compressdf256_mask:
+ case X86::BI__builtin_ia32_compressdf512_mask:
+ case X86::BI__builtin_ia32_compresssf128_mask:
+ case X86::BI__builtin_ia32_compresssf256_mask:
+ case X86::BI__builtin_ia32_compresssf512_mask:
+ case X86::BI__builtin_ia32_compressdi128_mask:
+ case X86::BI__builtin_ia32_compressdi256_mask:
+ case X86::BI__builtin_ia32_compressdi512_mask:
+ case X86::BI__builtin_ia32_compresssi128_mask:
+ case X86::BI__builtin_ia32_compresssi256_mask:
+ case X86::BI__builtin_ia32_compresssi512_mask:
+ case X86::BI__builtin_ia32_compresshi128_mask:
+ case X86::BI__builtin_ia32_compresshi256_mask:
+ case X86::BI__builtin_ia32_compresshi512_mask:
+ case X86::BI__builtin_ia32_compressqi128_mask:
+ case X86::BI__builtin_ia32_compressqi256_mask:
+ case X86::BI__builtin_ia32_compressqi512_mask:
+ case X86::BI__builtin_ia32_gather3div2df:
+ case X86::BI__builtin_ia32_gather3div2di:
+ case X86::BI__builtin_ia32_gather3div4df:
+ case X86::BI__builtin_ia32_gather3div4di:
+ case X86::BI__builtin_ia32_gather3div4sf:
+ case X86::BI__builtin_ia32_gather3div4si:
+ case X86::BI__builtin_ia32_gather3div8sf:
+ case X86::BI__builtin_ia32_gather3div8si:
+ case X86::BI__builtin_ia32_gather3siv2df:
+ case X86::BI__builtin_ia32_gather3siv2di:
+ case X86::BI__builtin_ia32_gather3siv4df:
+ case X86::BI__builtin_ia32_gather3siv4di:
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ case X86::BI__builtin_ia32_gather3siv4si:
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ case X86::BI__builtin_ia32_gather3siv8si:
+ case X86::BI__builtin_ia32_gathersiv8df:
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ case X86::BI__builtin_ia32_gathersiv8di:
+ case X86::BI__builtin_ia32_gathersiv16si:
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ case X86::BI__builtin_ia32_gatherdiv16si:
+ case X86::BI__builtin_ia32_scattersiv8df:
+ case X86::BI__builtin_ia32_scattersiv16sf:
+ case X86::BI__builtin_ia32_scatterdiv8df:
+ case X86::BI__builtin_ia32_scatterdiv16sf:
+ case X86::BI__builtin_ia32_scattersiv8di:
+ case X86::BI__builtin_ia32_scattersiv16si:
+ case X86::BI__builtin_ia32_scatterdiv8di:
+ case X86::BI__builtin_ia32_scatterdiv16si:
+ case X86::BI__builtin_ia32_scatterdiv2df:
+ case X86::BI__builtin_ia32_scatterdiv2di:
+ case X86::BI__builtin_ia32_scatterdiv4df:
+ case X86::BI__builtin_ia32_scatterdiv4di:
+ case X86::BI__builtin_ia32_scatterdiv4sf:
+ case X86::BI__builtin_ia32_scatterdiv4si:
+ case X86::BI__builtin_ia32_scatterdiv8sf:
+ case X86::BI__builtin_ia32_scatterdiv8si:
+ case X86::BI__builtin_ia32_scattersiv2df:
+ case X86::BI__builtin_ia32_scattersiv2di:
+ case X86::BI__builtin_ia32_scattersiv4df:
+ case X86::BI__builtin_ia32_scattersiv4di:
+ case X86::BI__builtin_ia32_scattersiv4sf:
+ case X86::BI__builtin_ia32_scattersiv4si:
+ case X86::BI__builtin_ia32_scattersiv8sf:
+ case X86::BI__builtin_ia32_scattersiv8si:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512:
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask:
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256:
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512:
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512:
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512:
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512:
+ case X86::BI__builtin_ia32_permdi256:
+ case X86::BI__builtin_ia32_permdf256:
+ case X86::BI__builtin_ia32_permdi512:
+ case X86::BI__builtin_ia32_permdf512:
+ case X86::BI__builtin_ia32_palignr128:
+ case X86::BI__builtin_ia32_palignr256:
+ case X86::BI__builtin_ia32_palignr512:
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512:
+ case X86::BI__builtin_ia32_shuf_f32x4_256:
+ case X86::BI__builtin_ia32_shuf_f64x2_256:
+ case X86::BI__builtin_ia32_shuf_i32x4_256:
+ case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_shuf_f32x4:
+ case X86::BI__builtin_ia32_shuf_f64x2:
+ case X86::BI__builtin_ia32_shuf_i32x4:
+ case X86::BI__builtin_ia32_shuf_i64x2:
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256:
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift:
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift:
+ case X86::BI__builtin_ia32_kshiftliqi:
+ case X86::BI__builtin_ia32_kshiftlihi:
+ case X86::BI__builtin_ia32_kshiftlisi:
+ case X86::BI__builtin_ia32_kshiftlidi:
+ case X86::BI__builtin_ia32_kshiftriqi:
+ case X86::BI__builtin_ia32_kshiftrihi:
+ case X86::BI__builtin_ia32_kshiftrisi:
+ case X86::BI__builtin_ia32_kshiftridi:
+ case X86::BI__builtin_ia32_vprotbi:
+ case X86::BI__builtin_ia32_vprotwi:
+ case X86::BI__builtin_ia32_vprotdi:
+ case X86::BI__builtin_ia32_vprotqi:
+ case X86::BI__builtin_ia32_prold128:
+ case X86::BI__builtin_ia32_prold256:
+ case X86::BI__builtin_ia32_prold512:
+ case X86::BI__builtin_ia32_prolq128:
+ case X86::BI__builtin_ia32_prolq256:
+ case X86::BI__builtin_ia32_prolq512:
+ case X86::BI__builtin_ia32_prord128:
+ case X86::BI__builtin_ia32_prord256:
+ case X86::BI__builtin_ia32_prord512:
+ case X86::BI__builtin_ia32_prorq128:
+ case X86::BI__builtin_ia32_prorq256:
+ case X86::BI__builtin_ia32_prorq512:
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ case X86::BI__builtin_ia32_selectsh_128:
+ case X86::BI__builtin_ia32_selectsbf_128:
+ case X86::BI__builtin_ia32_selectss_128:
+ case X86::BI__builtin_ia32_selectsd_128:
+ case X86::BI__builtin_ia32_cmpb128_mask:
+ case X86::BI__builtin_ia32_cmpb256_mask:
+ case X86::BI__builtin_ia32_cmpb512_mask:
+ case X86::BI__builtin_ia32_cmpw128_mask:
+ case X86::BI__builtin_ia32_cmpw256_mask:
+ case X86::BI__builtin_ia32_cmpw512_mask:
+ case X86::BI__builtin_ia32_cmpd128_mask:
+ case X86::BI__builtin_ia32_cmpd256_mask:
+ case X86::BI__builtin_ia32_cmpd512_mask:
+ case X86::BI__builtin_ia32_cmpq128_mask:
+ case X86::BI__builtin_ia32_cmpq256_mask:
+ case X86::BI__builtin_ia32_cmpq512_mask:
+ case X86::BI__builtin_ia32_ucmpb128_mask:
+ case X86::BI__builtin_ia32_ucmpb256_mask:
+ case X86::BI__builtin_ia32_ucmpb512_mask:
+ case X86::BI__builtin_ia32_ucmpw128_mask:
+ case X86::BI__builtin_ia32_ucmpw256_mask:
+ case X86::BI__builtin_ia32_ucmpw512_mask:
+ case X86::BI__builtin_ia32_ucmpd128_mask:
+ case X86::BI__builtin_ia32_ucmpd256_mask:
+ case X86::BI__builtin_ia32_ucmpd512_mask:
+ case X86::BI__builtin_ia32_ucmpq128_mask:
+ case X86::BI__builtin_ia32_ucmpq256_mask:
+ case X86::BI__builtin_ia32_ucmpq512_mask:
+ case X86::BI__builtin_ia32_vpcomb:
+ case X86::BI__builtin_ia32_vpcomw:
+ case X86::BI__builtin_ia32_vpcomd:
+ case X86::BI__builtin_ia32_vpcomq:
+ case X86::BI__builtin_ia32_vpcomub:
+ case X86::BI__builtin_ia32_vpcomuw:
+ case X86::BI__builtin_ia32_vpcomud:
+ case X86::BI__builtin_ia32_vpcomuq:
+ case X86::BI__builtin_ia32_kortestcqi:
+ case X86::BI__builtin_ia32_kortestchi:
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi:
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi:
+ case X86::BI__builtin_ia32_ktestcqi:
+ case X86::BI__builtin_ia32_ktestzqi:
+ case X86::BI__builtin_ia32_ktestchi:
+ case X86::BI__builtin_ia32_ktestzhi:
+ case X86::BI__builtin_ia32_ktestcsi:
+ case X86::BI__builtin_ia32_ktestzsi:
+ case X86::BI__builtin_ia32_ktestcdi:
+ case X86::BI__builtin_ia32_ktestzdi:
+ case X86::BI__builtin_ia32_kaddqi:
+ case X86::BI__builtin_ia32_kaddhi:
+ case X86::BI__builtin_ia32_kaddsi:
+ case X86::BI__builtin_ia32_kadddi:
+ case X86::BI__builtin_ia32_kandqi:
+ case X86::BI__builtin_ia32_kandhi:
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ case X86::BI__builtin_ia32_kandnqi:
+ case X86::BI__builtin_ia32_kandnhi:
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ case X86::BI__builtin_ia32_korqi:
+ case X86::BI__builtin_ia32_korhi:
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ case X86::BI__builtin_ia32_kxnorqi:
+ case X86::BI__builtin_ia32_kxnorhi:
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ case X86::BI__builtin_ia32_kxorqi:
+ case X86::BI__builtin_ia32_kxorhi:
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi:
+ case X86::BI__builtin_ia32_kmovb:
+ case X86::BI__builtin_ia32_kmovw:
+ case X86::BI__builtin_ia32_kmovd:
+ case X86::BI__builtin_ia32_kmovq:
+ case X86::BI__builtin_ia32_kunpckdi:
+ case X86::BI__builtin_ia32_kunpcksi:
+ case X86::BI__builtin_ia32_kunpckhi:
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ case X86::BI__builtin_ia32_sqrtss_round_mask:
+ case X86::BI__builtin_ia32_sqrtpd256:
+ case X86::BI__builtin_ia32_sqrtpd:
+ case X86::BI__builtin_ia32_sqrtps256:
+ case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtph256:
+ case X86::BI__builtin_ia32_sqrtph:
+ case X86::BI__builtin_ia32_sqrtph512:
+ case X86::BI__builtin_ia32_vsqrtbf16256:
+ case X86::BI__builtin_ia32_vsqrtbf16:
+ case X86::BI__builtin_ia32_vsqrtbf16512:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512:
+ case X86::BI__builtin_ia32_pmuludq128:
+ case X86::BI__builtin_ia32_pmuludq256:
+ case X86::BI__builtin_ia32_pmuludq512:
+ case X86::BI__builtin_ia32_pmuldq128:
+ case X86::BI__builtin_ia32_pmuldq256:
+ case X86::BI__builtin_ia32_pmuldq512:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ case X86::BI__builtin_ia32_reduce_fadd_pd512:
+ case X86::BI__builtin_ia32_reduce_fadd_ps512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph256:
+ case X86::BI__builtin_ia32_reduce_fadd_ph128:
+ case X86::BI__builtin_ia32_reduce_fmul_pd512:
+ case X86::BI__builtin_ia32_reduce_fmul_ps512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph256:
+ case X86::BI__builtin_ia32_reduce_fmul_ph128:
+ case X86::BI__builtin_ia32_reduce_fmax_pd512:
+ case X86::BI__builtin_ia32_reduce_fmax_ps512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph256:
+ case X86::BI__builtin_ia32_reduce_fmax_ph128:
+ case X86::BI__builtin_ia32_reduce_fmin_pd512:
+ case X86::BI__builtin_ia32_reduce_fmin_ps512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph256:
+ case X86::BI__builtin_ia32_reduce_fmin_ph128:
+ case X86::BI__builtin_ia32_rdrand16_step:
+ case X86::BI__builtin_ia32_rdrand32_step:
+ case X86::BI__builtin_ia32_rdrand64_step:
+ case X86::BI__builtin_ia32_rdseed16_step:
+ case X86::BI__builtin_ia32_rdseed32_step:
+ case X86::BI__builtin_ia32_rdseed64_step:
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ case X86::BI__builtin_ia32_subborrow_u32:
+ case X86::BI__builtin_ia32_subborrow_u64:
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16512_mask:
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ case X86::BI__builtin_ia32_fpclassph512_mask:
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ case X86::BI__builtin_ia32_fpclasspd512_mask:
+ case X86::BI__builtin_ia32_vp2intersect_q_512:
+ case X86::BI__builtin_ia32_vp2intersect_q_256:
+ case X86::BI__builtin_ia32_vp2intersect_q_128:
+ case X86::BI__builtin_ia32_vp2intersect_d_512:
+ case X86::BI__builtin_ia32_vp2intersect_d_256:
+ case X86::BI__builtin_ia32_vp2intersect_d_128:
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ case X86::BI__builtin_ia32_vpmultishiftqb512:
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpph128_mask:
+ case X86::BI__builtin_ia32_cmpph256_mask:
+ case X86::BI__builtin_ia32_cmpph512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16256_mask:
+ case X86::BI__builtin_ia32_vcmpbf16128_mask:
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_vcvtph2ps_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+ case X86::BI__builtin_ia32_cvtsbf162ss_32:
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ case X86::BI__cpuid:
+ case X86::BI__cpuidex:
+ case X86::BI__emul:
+ case X86::BI__emulu:
+ case X86::BI__mulh:
+ case X86::BI__umulh:
+ case X86::BI_mul128:
+ case X86::BI_umul128:
+ case X86::BI__faststorefence:
+ case X86::BI__shiftleft128:
+ case X86::BI__shiftright128:
+ case X86::BI_ReadWriteBarrier:
+ case X86::BI_ReadBarrier:
+ case X86::BI_WriteBarrier:
+ case X86::BI_AddressOfReturnAddress:
+ case X86::BI__stosb:
+ case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
+ case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
+ case X86::BI__ud2:
+ case X86::BI__int2c:
+ case X86::BI__readfsbyte:
+ case X86::BI__readfsword:
+ case X86::BI__readfsdword:
+ case X86::BI__readfsqword:
+ case X86::BI__readgsbyte:
+ case X86::BI__readgsword:
+ case X86::BI__readgsdword:
+ case X86::BI__readgsqword:
+ case X86::BI__builtin_ia32_encodekey128_u32:
+ case X86::BI__builtin_ia32_encodekey256_u32:
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_prefetchi:
+ cgm.errorNYI(e->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
index df42af8..eef3739 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
@@ -37,6 +37,10 @@ CIRGenCXXABI::AddedStructorArgCounts CIRGenCXXABI::addImplicitConstructorArgs(
addedArgs.suffix.size());
}
+CatchTypeInfo CIRGenCXXABI::getCatchAllTypeInfo() {
+ return CatchTypeInfo{{}, 0};
+}
+
void CIRGenCXXABI::buildThisParam(CIRGenFunction &cgf,
FunctionArgList &params) {
const auto *md = cast<CXXMethodDecl>(cgf.curGD.getDecl());
@@ -81,8 +85,7 @@ CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *e) {
if (!requiresArrayCookie(e))
return CharUnits::Zero();
- cgm.errorNYI(e->getSourceRange(), "CIRGenCXXABI::getArrayCookieSize");
- return CharUnits::Zero();
+ return getArrayCookieSizeImpl(e->getAllocatedType());
}
bool CIRGenCXXABI::requiresArrayCookie(const CXXNewExpr *e) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index 6d3741c4..c78f9b0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_LIB_CIR_CIRGENCXXABI_H
#include "CIRGenCall.h"
+#include "CIRGenCleanup.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
@@ -155,6 +156,8 @@ public:
/// Loads the incoming C++ this pointer as it was passed by the caller.
mlir::Value loadIncomingCXXThis(CIRGenFunction &cgf);
+ virtual CatchTypeInfo getCatchAllTypeInfo();
+
/// Get the implicit (second) parameter that comes after the "this" pointer,
/// or nullptr if there is isn't one.
virtual mlir::Value getCXXDestructorImplicitParam(CIRGenFunction &cgf,
@@ -299,8 +302,28 @@ public:
/// - non-array allocations never need a cookie
/// - calls to \::operator new(size_t, void*) never need a cookie
///
- /// \param E - the new-expression being allocated.
+ /// \param e - the new-expression being allocated.
virtual CharUnits getArrayCookieSize(const CXXNewExpr *e);
+
+ /// Initialize the array cookie for the given allocation.
+ ///
+ /// \param newPtr - a char* which is the presumed-non-null
+ /// return value of the allocation function
+ /// \param numElements - the computed number of elements,
+ /// potentially collapsed from the multidimensional array case;
+ /// always a size_t
+ /// \param elementType - the base element allocated type,
+ /// i.e. the allocated type after stripping all array types
+ virtual Address initializeArrayCookie(CIRGenFunction &cgf, Address newPtr,
+ mlir::Value numElements,
+ const CXXNewExpr *e,
+ QualType elementType) = 0;
+
+protected:
+ /// Returns the extra size required in order to store the array
+ /// cookie for the given type. Assumes that an array cookie is
+ /// required.
+ virtual CharUnits getArrayCookieSizeImpl(QualType elementType) = 0;
};
/// Creates and Itanium-family ABI
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index 8700697..851328a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -28,6 +28,46 @@ using namespace clang::CIRGen;
// CIRGenFunction cleanup related
//===----------------------------------------------------------------------===//
+/// Build a unconditional branch to the lexical scope cleanup block
+/// or with the labeled blocked if already solved.
+///
+/// Track on scope basis, goto's we need to fix later.
+cir::BrOp CIRGenFunction::emitBranchThroughCleanup(mlir::Location loc,
+ JumpDest dest) {
+ // Insert a branch: to the cleanup block (unsolved) or to the already
+ // materialized label. Keep track of unsolved goto's.
+ assert(dest.getBlock() && "assumes incoming valid dest");
+ auto brOp = cir::BrOp::create(builder, loc, dest.getBlock());
+
+ // Calculate the innermost active normal cleanup.
+ EHScopeStack::stable_iterator topCleanup =
+ ehStack.getInnermostActiveNormalCleanup();
+
+ // If we're not in an active normal cleanup scope, or if the
+ // destination scope is within the innermost active normal cleanup
+ // scope, we don't need to worry about fixups.
+ if (topCleanup == ehStack.stable_end() ||
+ topCleanup.encloses(dest.getScopeDepth())) { // works for invalid
+ // FIXME(cir): should we clear insertion point here?
+ return brOp;
+ }
+
+ // If we can't resolve the destination cleanup scope, just add this
+ // to the current cleanup scope as a branch fixup.
+ if (!dest.getScopeDepth().isValid()) {
+ BranchFixup &fixup = ehStack.addBranchFixup();
+ fixup.destination = dest.getBlock();
+ fixup.destinationIndex = dest.getDestIndex();
+ fixup.initialBranch = brOp;
+ fixup.optimisticBranchBlock = nullptr;
+ // FIXME(cir): should we clear insertion point here?
+ return brOp;
+ }
+
+ cgm.errorNYI(loc, "emitBranchThroughCleanup: valid destination scope depth");
+ return brOp;
+}
+
/// Emits all the code to cause the given temporary to be cleaned up.
void CIRGenFunction::emitCXXTemporary(const CXXTemporary *temporary,
QualType tempType, Address ptr) {
@@ -40,6 +80,19 @@ void CIRGenFunction::emitCXXTemporary(const CXXTemporary *temporary,
void EHScopeStack::Cleanup::anchor() {}
+EHScopeStack::stable_iterator
+EHScopeStack::getInnermostActiveNormalCleanup() const {
+ stable_iterator si = getInnermostNormalCleanup();
+ stable_iterator se = stable_end();
+ while (si != se) {
+ EHCleanupScope &cleanup = llvm::cast<EHCleanupScope>(*find(si));
+ if (cleanup.isActive())
+ return si;
+ si = cleanup.getEnclosingNormalCleanup();
+ }
+ return stable_end();
+}
+
/// Push an entry of the given size onto this protected-scope stack.
char *EHScopeStack::allocate(size_t size) {
size = llvm::alignTo(size, ScopeStackAlignment);
@@ -75,14 +128,30 @@ void EHScopeStack::deallocate(size_t size) {
startOfData += llvm::alignTo(size, ScopeStackAlignment);
}
+/// Remove any 'null' fixups on the stack. However, we can't pop more
+/// fixups than the fixup depth on the innermost normal cleanup, or
+/// else fixups that we try to add to that cleanup will end up in the
+/// wrong place. We *could* try to shrink fixup depths, but that's
+/// actually a lot of work for little benefit.
+void EHScopeStack::popNullFixups() {
+ // We expect this to only be called when there's still an innermost
+ // normal cleanup; otherwise there really shouldn't be any fixups.
+ cgf->cgm.errorNYI("popNullFixups");
+}
+
void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) {
char *buffer = allocate(EHCleanupScope::getSizeForCleanupSize(size));
+ bool isNormalCleanup = kind & NormalCleanup;
bool isEHCleanup = kind & EHCleanup;
bool isLifetimeMarker = kind & LifetimeMarker;
assert(!cir::MissingFeatures::innermostEHScope());
- EHCleanupScope *scope = new (buffer) EHCleanupScope(size);
+ EHCleanupScope *scope = new (buffer)
+ EHCleanupScope(size, branchFixups.size(), innermostNormalCleanup);
+
+ if (isNormalCleanup)
+ innermostNormalCleanup = stable_begin();
if (isLifetimeMarker)
cgf->cgm.errorNYI("push lifetime marker cleanup");
@@ -100,12 +169,30 @@ void EHScopeStack::popCleanup() {
assert(isa<EHCleanupScope>(*begin()));
EHCleanupScope &cleanup = cast<EHCleanupScope>(*begin());
+ innermostNormalCleanup = cleanup.getEnclosingNormalCleanup();
deallocate(cleanup.getAllocatedSize());
// Destroy the cleanup.
cleanup.destroy();
- assert(!cir::MissingFeatures::ehCleanupBranchFixups());
+ // Check whether we can shrink the branch-fixups stack.
+ if (!branchFixups.empty()) {
+ // If we no longer have any normal cleanups, all the fixups are
+ // complete.
+ if (!hasNormalCleanups()) {
+ branchFixups.clear();
+ } else {
+ // Otherwise we can still trim out unnecessary nulls.
+ popNullFixups();
+ }
+ }
+}
+
+EHCatchScope *EHScopeStack::pushCatch(unsigned numHandlers) {
+ char *buffer = allocate(EHCatchScope::getSizeForNumHandlers(numHandlers));
+ assert(!cir::MissingFeatures::innermostEHScope());
+ EHCatchScope *scope = new (buffer) EHCatchScope(numHandlers);
+ return scope;
}
static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup) {
@@ -116,6 +203,18 @@ static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup) {
assert(cgf.haveInsertPoint() && "cleanup ended with no insertion point?");
}
+static mlir::Block *createNormalEntry(CIRGenFunction &cgf,
+ EHCleanupScope &scope) {
+ assert(scope.isNormalCleanup());
+ mlir::Block *entry = scope.getNormalBlock();
+ if (!entry) {
+ mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder());
+ entry = cgf.curLexScope->getOrCreateCleanupBlock(cgf.getBuilder());
+ scope.setNormalBlock(entry);
+ }
+ return entry;
+}
+
/// Pops a cleanup block. If the block includes a normal cleanup, the
/// current insertion point is threaded through the cleanup, as are
/// any branch fixups on the cleanup.
@@ -123,17 +222,21 @@ void CIRGenFunction::popCleanupBlock() {
assert(!ehStack.empty() && "cleanup stack is empty!");
assert(isa<EHCleanupScope>(*ehStack.begin()) && "top not a cleanup!");
EHCleanupScope &scope = cast<EHCleanupScope>(*ehStack.begin());
+ assert(scope.getFixupDepth() <= ehStack.getNumBranchFixups());
// Remember activation information.
bool isActive = scope.isActive();
- assert(!cir::MissingFeatures::ehCleanupBranchFixups());
+ // - whether there are branch fix-ups through this cleanup
+ unsigned fixupDepth = scope.getFixupDepth();
+ bool hasFixups = ehStack.getNumBranchFixups() != fixupDepth;
// - whether there's a fallthrough
mlir::Block *fallthroughSource = builder.getInsertionBlock();
bool hasFallthrough = fallthroughSource != nullptr && isActive;
- bool requiresNormalCleanup = scope.isNormalCleanup() && hasFallthrough;
+ bool requiresNormalCleanup =
+ scope.isNormalCleanup() && (hasFixups || hasFallthrough);
// If we don't need the cleanup at all, we're done.
assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup());
@@ -168,9 +271,119 @@ void CIRGenFunction::popCleanupBlock() {
assert(!cir::MissingFeatures::ehCleanupFlags());
- ehStack.popCleanup();
- scope.markEmitted();
- emitCleanup(*this, cleanup);
+ // If we have a fallthrough and no other need for the cleanup,
+ // emit it directly.
+ if (hasFallthrough && !hasFixups) {
+ assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup());
+ ehStack.popCleanup();
+ scope.markEmitted();
+ emitCleanup(*this, cleanup);
+ } else {
+ // Otherwise, the best approach is to thread everything through
+ // the cleanup block and then try to clean up after ourselves.
+
+ // Force the entry block to exist.
+ mlir::Block *normalEntry = createNormalEntry(*this, scope);
+
+ // I. Set up the fallthrough edge in.
+ mlir::OpBuilder::InsertPoint savedInactiveFallthroughIP;
+
+ // If there's a fallthrough, we need to store the cleanup
+ // destination index. For fall-throughs this is always zero.
+ if (hasFallthrough) {
+ assert(!cir::MissingFeatures::ehCleanupHasPrebranchedFallthrough());
+
+ } else if (fallthroughSource) {
+ // Otherwise, save and clear the IP if we don't have fallthrough
+ // because the cleanup is inactive.
+ assert(!isActive && "source without fallthrough for active cleanup");
+ savedInactiveFallthroughIP = builder.saveInsertionPoint();
+ }
+
+ // II. Emit the entry block. This implicitly branches to it if
+ // we have fallthrough. All the fixups and existing branches
+ // should already be branched to it.
+ builder.setInsertionPointToEnd(normalEntry);
+
+ // intercept normal cleanup to mark SEH scope end
+ assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup());
+
+ // III. Figure out where we're going and build the cleanup
+ // epilogue.
+ bool hasEnclosingCleanups =
+ (scope.getEnclosingNormalCleanup() != ehStack.stable_end());
+
+ // Compute the branch-through dest if we need it:
+ // - if there are branch-throughs threaded through the scope
+ // - if fall-through is a branch-through
+ // - if there are fixups that will be optimistically forwarded
+ // to the enclosing cleanup
+ assert(!cir::MissingFeatures::cleanupBranchThrough());
+ if (hasFixups && hasEnclosingCleanups)
+ cgm.errorNYI("cleanup branch-through dest");
+
+ mlir::Block *fallthroughDest = nullptr;
+
+ // If there's exactly one branch-after and no other threads,
+ // we can route it without a switch.
+ // Skip for SEH, since ExitSwitch is used to generate code to indicate
+ // abnormal termination. (SEH: Except _leave and fall-through at
+ // the end, all other exits in a _try (return/goto/continue/break)
+ // are considered as abnormal terminations, using NormalCleanupDestSlot
+ // to indicate abnormal termination)
+ assert(!cir::MissingFeatures::cleanupBranchThrough());
+ assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup());
+
+ // IV. Pop the cleanup and emit it.
+ scope.markEmitted();
+ ehStack.popCleanup();
+ assert(ehStack.hasNormalCleanups() == hasEnclosingCleanups);
+
+ emitCleanup(*this, cleanup);
+
+ // Append the prepared cleanup prologue from above.
+ assert(!cir::MissingFeatures::cleanupAppendInsts());
+
+ // Optimistically hope that any fixups will continue falling through.
+ if (fixupDepth != ehStack.getNumBranchFixups())
+ cgm.errorNYI("cleanup fixup depth mismatch");
+
+ // V. Set up the fallthrough edge out.
+
+ // Case 1: a fallthrough source exists but doesn't branch to the
+ // cleanup because the cleanup is inactive.
+ if (!hasFallthrough && fallthroughSource) {
+ // Prebranched fallthrough was forwarded earlier.
+ // Non-prebranched fallthrough doesn't need to be forwarded.
+ // Either way, all we need to do is restore the IP we cleared before.
+ assert(!isActive);
+ cgm.errorNYI("cleanup inactive fallthrough");
+
+ // Case 2: a fallthrough source exists and should branch to the
+ // cleanup, but we're not supposed to branch through to the next
+ // cleanup.
+ } else if (hasFallthrough && fallthroughDest) {
+ cgm.errorNYI("cleanup fallthrough destination");
+
+ // Case 3: a fallthrough source exists and should branch to the
+ // cleanup and then through to the next.
+ } else if (hasFallthrough) {
+ // Everything is already set up for this.
+
+ // Case 4: no fallthrough source exists.
+ } else {
+ // FIXME(cir): should we clear insertion point here?
+ }
+
+ // VI. Assorted cleaning.
+
+ // Check whether we can merge NormalEntry into a single predecessor.
+ // This might invalidate (non-IR) pointers to NormalEntry.
+ //
+ // If it did invalidate those pointers, and normalEntry was the same
+ // as NormalExit, go back and patch up the fixups.
+ assert(!cir::MissingFeatures::simplifyCleanupEntry());
+ }
}
/// Pops cleanup blocks until the given savepoint is reached.
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
index 30f5607..9acf8b1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
@@ -20,6 +20,13 @@
namespace clang::CIRGen {
+/// The MS C++ ABI needs a pointer to RTTI data plus some flags to describe the
+/// type of a catch handler, so we use this wrapper.
+struct CatchTypeInfo {
+ mlir::TypedAttr rtti;
+ unsigned flags;
+};
+
/// A protected scope for zero-cost EH handling.
class EHScope {
class CommonBitFields {
@@ -29,6 +36,12 @@ class EHScope {
enum { NumCommonBits = 3 };
protected:
+ class CatchBitFields {
+ friend class EHCatchScope;
+ unsigned : NumCommonBits;
+ unsigned numHandlers : 32 - NumCommonBits;
+ };
+
class CleanupBitFields {
friend class EHCleanupScope;
unsigned : NumCommonBits;
@@ -58,6 +71,7 @@ protected:
union {
CommonBitFields commonBits;
+ CatchBitFields catchBits;
CleanupBitFields cleanupBits;
};
@@ -67,11 +81,88 @@ public:
EHScope(Kind kind) { commonBits.kind = kind; }
Kind getKind() const { return static_cast<Kind>(commonBits.kind); }
+
+ bool mayThrow() const {
+ // Traditional LLVM codegen also checks for `!block->use_empty()`, but
+ // in CIRGen the block content is not important, just used as a way to
+ // signal `hasEHBranches`.
+ assert(!cir::MissingFeatures::ehstackBranches());
+ return false;
+ }
+};
+
+/// A scope which attempts to handle some, possibly all, types of
+/// exceptions.
+///
+/// Objective C \@finally blocks are represented using a cleanup scope
+/// after the catch scope.
+
+class EHCatchScope : public EHScope {
+ // In effect, we have a flexible array member
+ // Handler Handlers[0];
+ // But that's only standard in C99, not C++, so we have to do
+ // annoying pointer arithmetic instead.
+
+public:
+ struct Handler {
+ /// A type info value, or null MLIR attribute for a catch-all
+ CatchTypeInfo type;
+
+ /// The catch handler for this type.
+ mlir::Region *region;
+ };
+
+private:
+ friend class EHScopeStack;
+
+ Handler *getHandlers() { return reinterpret_cast<Handler *>(this + 1); }
+
+public:
+ static size_t getSizeForNumHandlers(unsigned n) {
+ return sizeof(EHCatchScope) + n * sizeof(Handler);
+ }
+
+ EHCatchScope(unsigned numHandlers) : EHScope(Catch) {
+ catchBits.numHandlers = numHandlers;
+ assert(catchBits.numHandlers == numHandlers && "NumHandlers overflow?");
+ }
+
+ unsigned getNumHandlers() const { return catchBits.numHandlers; }
+
+ void setHandler(unsigned i, CatchTypeInfo type, mlir::Region *region) {
+ assert(i < getNumHandlers());
+ getHandlers()[i].type = type;
+ getHandlers()[i].region = region;
+ }
+
+ // Clear all handler blocks.
+ // FIXME: it's better to always call clearHandlerBlocks in DTOR and have a
+ // 'takeHandler' or some such function which removes ownership from the
+ // EHCatchScope object if the handlers should live longer than EHCatchScope.
+ void clearHandlerBlocks() {
+ // The blocks are owned by TryOp, nothing to delete.
+ }
+
+ static bool classof(const EHScope *scope) {
+ return scope->getKind() == Catch;
+ }
};
/// A cleanup scope which generates the cleanup blocks lazily.
class alignas(EHScopeStack::ScopeStackAlignment) EHCleanupScope
: public EHScope {
+ /// The nearest normal cleanup scope enclosing this one.
+ EHScopeStack::stable_iterator enclosingNormal;
+
+ /// The dual entry/exit block along the normal edge. This is lazily
+ /// created if needed before the cleanup is popped.
+ mlir::Block *normalBlock = nullptr;
+
+ /// The number of fixups required by enclosing scopes (not including
+ /// this one). If this is the top cleanup scope, all the fixups
+ /// from this index onwards belong to this scope.
+ unsigned fixupDepth = 0;
+
public:
/// Gets the size required for a lazy cleanup scope with the given
/// cleanup-data requirements.
@@ -83,7 +174,10 @@ public:
return sizeof(EHCleanupScope) + cleanupBits.cleanupSize;
}
- EHCleanupScope(unsigned cleanupSize) : EHScope(EHScope::Cleanup) {
+ EHCleanupScope(unsigned cleanupSize, unsigned fixupDepth,
+ EHScopeStack::stable_iterator enclosingNormal)
+ : EHScope(EHScope::Cleanup), enclosingNormal(enclosingNormal),
+ fixupDepth(fixupDepth) {
// TODO(cir): When exception handling is upstreamed, isNormalCleanup and
// isEHCleanup will be arguments to the constructor.
cleanupBits.isNormalCleanup = true;
@@ -101,11 +195,19 @@ public:
// Objects of EHCleanupScope are not destructed. Use destroy().
~EHCleanupScope() = delete;
+ mlir::Block *getNormalBlock() const { return normalBlock; }
+ void setNormalBlock(mlir::Block *bb) { normalBlock = bb; }
+
bool isNormalCleanup() const { return cleanupBits.isNormalCleanup; }
bool isActive() const { return cleanupBits.isActive; }
void setActive(bool isActive) { cleanupBits.isActive = isActive; }
+ unsigned getFixupDepth() const { return fixupDepth; }
+ EHScopeStack::stable_iterator getEnclosingNormalCleanup() const {
+ return enclosingNormal;
+ }
+
size_t getCleanupSize() const { return cleanupBits.cleanupSize; }
void *getCleanupBuffer() { return this + 1; }
@@ -147,5 +249,13 @@ EHScopeStack::find(stable_iterator savePoint) const {
return iterator(endOfBuffer - savePoint.size);
}
+inline void EHScopeStack::popCatch() {
+ assert(!empty() && "popping exception stack when not empty");
+
+ EHCatchScope &scope = llvm::cast<EHCatchScope>(*begin());
+ assert(!cir::MissingFeatures::innermostEHScope());
+ deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers()));
+}
+
} // namespace clang::CIRGen
#endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp
index f9ff37b..717a3e0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenException.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp
@@ -69,6 +69,153 @@ mlir::LogicalResult CIRGenFunction::emitCXXTryStmt(const CXXTryStmt &s) {
if (s.getTryBlock()->body_empty())
return mlir::LogicalResult::success();
- cgm.errorNYI("exitCXXTryStmt: CXXTryStmt with non-empty body");
- return mlir::LogicalResult::success();
+ mlir::Location loc = getLoc(s.getSourceRange());
+ // Create a scope to hold try local storage for catch params.
+
+ mlir::OpBuilder::InsertPoint scopeIP;
+ cir::ScopeOp::create(
+ builder, loc,
+ /*scopeBuilder=*/[&](mlir::OpBuilder &b, mlir::Location loc) {
+ scopeIP = builder.saveInsertionPoint();
+ });
+
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.restoreInsertionPoint(scopeIP);
+ mlir::LogicalResult result = emitCXXTryStmtUnderScope(s);
+ cir::YieldOp::create(builder, loc);
+ return result;
+}
+
+mlir::LogicalResult
+CIRGenFunction::emitCXXTryStmtUnderScope(const CXXTryStmt &s) {
+ const llvm::Triple &t = getTarget().getTriple();
+ // If we encounter a try statement on in an OpenMP target region offloaded to
+ // a GPU, we treat it as a basic block.
+ const bool isTargetDevice =
+ (cgm.getLangOpts().OpenMPIsTargetDevice && (t.isNVPTX() || t.isAMDGCN()));
+ if (isTargetDevice) {
+ cgm.errorNYI(
+ "emitCXXTryStmtUnderScope: OpenMP target region offloaded to GPU");
+ return mlir::success();
+ }
+
+ unsigned numHandlers = s.getNumHandlers();
+ mlir::Location tryLoc = getLoc(s.getBeginLoc());
+ mlir::OpBuilder::InsertPoint beginInsertTryBody;
+
+ bool hasCatchAll = false;
+ for (unsigned i = 0; i != numHandlers; ++i) {
+ hasCatchAll |= s.getHandler(i)->getExceptionDecl() == nullptr;
+ if (hasCatchAll)
+ break;
+ }
+
+ // Create the scope to represent only the C/C++ `try {}` part. However,
+ // don't populate right away. Create regions for the catch handlers,
+ // but don't emit the handler bodies yet. For now, only make sure the
+ // scope returns the exception information.
+ auto tryOp = cir::TryOp::create(
+ builder, tryLoc,
+ /*tryBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ beginInsertTryBody = builder.saveInsertionPoint();
+ },
+ /*handlersBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc,
+ mlir::OperationState &result) {
+ mlir::OpBuilder::InsertionGuard guard(b);
+
+ // We create an extra region for an unwind catch handler in case the
+ // catch-all handler doesn't exists
+ unsigned numRegionsToCreate =
+ hasCatchAll ? numHandlers : numHandlers + 1;
+
+ for (unsigned i = 0; i != numRegionsToCreate; ++i) {
+ mlir::Region *region = result.addRegion();
+ builder.createBlock(region);
+ }
+ });
+
+ // Finally emit the body for try/catch.
+ {
+ mlir::Location loc = tryOp.getLoc();
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.restoreInsertionPoint(beginInsertTryBody);
+ CIRGenFunction::LexicalScope tryScope{*this, loc,
+ builder.getInsertionBlock()};
+
+ tryScope.setAsTry(tryOp);
+
+ // Attach the basic blocks for the catch regions.
+ enterCXXTryStmt(s, tryOp);
+
+ // Emit the body for the `try {}` part.
+ {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ CIRGenFunction::LexicalScope tryBodyScope{*this, loc,
+ builder.getInsertionBlock()};
+ if (emitStmt(s.getTryBlock(), /*useCurrentScope=*/true).failed())
+ return mlir::failure();
+ }
+
+ // Emit catch clauses.
+ exitCXXTryStmt(s);
+ }
+
+ return mlir::success();
+}
+
+void CIRGenFunction::enterCXXTryStmt(const CXXTryStmt &s, cir::TryOp tryOp,
+ bool isFnTryBlock) {
+ unsigned numHandlers = s.getNumHandlers();
+ EHCatchScope *catchScope = ehStack.pushCatch(numHandlers);
+ for (unsigned i = 0; i != numHandlers; ++i) {
+ const CXXCatchStmt *catchStmt = s.getHandler(i);
+ if (catchStmt->getExceptionDecl()) {
+ cgm.errorNYI("enterCXXTryStmt: CatchStmt with ExceptionDecl");
+ return;
+ }
+
+ // No exception decl indicates '...', a catch-all.
+ mlir::Region *handler = &tryOp.getHandlerRegions()[i];
+ catchScope->setHandler(i, cgm.getCXXABI().getCatchAllTypeInfo(), handler);
+
+ // Under async exceptions, catch(...) needs to catch HW exception too
+ // Mark scope with SehTryBegin as a SEH __try scope
+ if (getLangOpts().EHAsynch) {
+ cgm.errorNYI("enterCXXTryStmt: EHAsynch");
+ return;
+ }
+ }
+}
+
+void CIRGenFunction::exitCXXTryStmt(const CXXTryStmt &s, bool isFnTryBlock) {
+ unsigned numHandlers = s.getNumHandlers();
+ EHCatchScope &catchScope = cast<EHCatchScope>(*ehStack.begin());
+ assert(catchScope.getNumHandlers() == numHandlers);
+ cir::TryOp tryOp = curLexScope->getTry();
+
+ // If the catch was not required, bail out now.
+ if (!catchScope.mayThrow()) {
+ catchScope.clearHandlerBlocks();
+ ehStack.popCatch();
+
+ // Drop all basic block from all catch regions.
+ SmallVector<mlir::Block *> eraseBlocks;
+ for (mlir::Region &handlerRegion : tryOp.getHandlerRegions()) {
+ if (handlerRegion.empty())
+ continue;
+
+ for (mlir::Block &b : handlerRegion.getBlocks())
+ eraseBlocks.push_back(&b);
+ }
+
+ for (mlir::Block *b : eraseBlocks)
+ b->erase();
+
+ tryOp.setHandlerTypesAttr({});
+ return;
+ }
+
+ cgm.errorNYI("exitCXXTryStmt: Required catch");
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 9732c9c..52021fc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1675,7 +1675,25 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) {
// name to make it clear it's not the actual builtin.
auto fn = cast<cir::FuncOp>(curFn);
if (fn.getName() != fdInlineName && onlyHasInlineBuiltinDeclaration(fd)) {
- cgm.errorNYI("Inline only builtin function calls");
+ cir::FuncOp clone =
+ mlir::cast_or_null<cir::FuncOp>(cgm.getGlobalValue(fdInlineName));
+
+ if (!clone) {
+ // Create a forward declaration - the body will be generated in
+ // generateCode when the function definition is processed
+ cir::FuncOp calleeFunc = emitFunctionDeclPointer(cgm, gd);
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToStart(cgm.getModule().getBody());
+
+ clone = builder.create<cir::FuncOp>(calleeFunc.getLoc(), fdInlineName,
+ calleeFunc.getFunctionType());
+ clone.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
+ &cgm.getMLIRContext(), cir::GlobalLinkageKind::InternalLinkage));
+ clone.setSymVisibility("private");
+ clone.setInlineKindAttr(cir::InlineAttr::get(
+ &cgm.getMLIRContext(), cir::InlineKind::AlwaysInline));
+ }
+ return CIRGenCallee::forDirect(clone, gd);
}
// Replaceable builtins provide their own implementation of a builtin. If we
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
index 568cbdb..d6d226b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
@@ -280,6 +280,7 @@ public:
void VisitUnaryDeref(UnaryOperator *e) { emitAggLoadOfLValue(e); }
void VisitStringLiteral(StringLiteral *e) { emitAggLoadOfLValue(e); }
void VisitCompoundLiteralExpr(CompoundLiteralExpr *e);
+
void VisitPredefinedExpr(const PredefinedExpr *e) {
cgf.cgm.errorNYI(e->getSourceRange(),
"AggExprEmitter: VisitPredefinedExpr");
@@ -670,7 +671,7 @@ void AggExprEmitter::emitNullInitializationToLValue(mlir::Location loc,
return;
}
- cgf.cgm.errorNYI("emitStoreThroughBitfieldLValue");
+ cgf.emitStoreThroughBitfieldLValue(RValue::get(null), lv);
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index b1e9e76..fe9e210 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -306,6 +306,7 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
mlir::cast<cir::IntAttr>(constNumElements).getValue();
unsigned numElementsWidth = count.getBitWidth();
+ bool hasAnyOverflow = false;
// The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as
// overflow, but that should never happen. The size argument is implicitly
@@ -336,11 +337,22 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
// Add in the cookie, and check whether it's overflowed.
if (cookieSize != 0) {
- cgf.cgm.errorNYI(e->getSourceRange(),
- "emitCXXNewAllocSize: array cookie");
+ // Save the current size without a cookie. This shouldn't be
+ // used if there was overflow
+ sizeWithoutCookie = cgf.getBuilder().getConstInt(
+ loc, allocationSize.zextOrTrunc(sizeWidth));
+
+ allocationSize = allocationSize.uadd_ov(cookieSize, overflow);
+ hasAnyOverflow |= overflow;
}
- size = cgf.getBuilder().getConstInt(loc, allocationSize);
+ // On overflow, produce a -1 so operator new will fail
+ if (hasAnyOverflow) {
+ size =
+ cgf.getBuilder().getConstInt(loc, llvm::APInt::getAllOnes(sizeWidth));
+ } else {
+ size = cgf.getBuilder().getConstInt(loc, allocationSize);
+ }
} else {
// TODO: Handle the variable size case
cgf.cgm.errorNYI(e->getSourceRange(),
@@ -390,7 +402,50 @@ void CIRGenFunction::emitNewArrayInitializer(
if (!e->hasInitializer())
return;
- cgm.errorNYI(e->getSourceRange(), "emitNewArrayInitializer");
+ unsigned initListElements = 0;
+
+ const Expr *init = e->getInitializer();
+ const InitListExpr *ile = dyn_cast<InitListExpr>(init);
+ if (ile) {
+ cgm.errorNYI(ile->getSourceRange(), "emitNewArrayInitializer: init list");
+ return;
+ }
+
+ // If all elements have already been initialized, skip any further
+ // initialization.
+ auto constOp = mlir::dyn_cast<cir::ConstantOp>(numElements.getDefiningOp());
+ if (constOp) {
+ auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constOp.getValue());
+ // Just skip out if the constant count is zero.
+ if (constIntAttr && constIntAttr.getUInt() <= initListElements)
+ return;
+ }
+
+ assert(init && "have trailing elements to initialize but no initializer");
+
+ // If this is a constructor call, try to optimize it out, and failing that
+ // emit a single loop to initialize all remaining elements.
+ if (const CXXConstructExpr *cce = dyn_cast<CXXConstructExpr>(init)) {
+ CXXConstructorDecl *ctor = cce->getConstructor();
+ if (ctor->isTrivial()) {
+ // If new expression did not specify value-initialization, then there
+ // is no initialization.
+ if (!cce->requiresZeroInitialization())
+ return;
+
+ cgm.errorNYI(cce->getSourceRange(),
+ "emitNewArrayInitializer: trivial ctor zero-init");
+ return;
+ }
+
+ cgm.errorNYI(cce->getSourceRange(),
+ "emitNewArrayInitializer: ctor initializer");
+ return;
+ }
+
+ cgm.errorNYI(init->getSourceRange(),
+ "emitNewArrayInitializer: unsupported initializer");
+ return;
}
static void emitNewInitializer(CIRGenFunction &cgf, const CXXNewExpr *e,
@@ -586,9 +641,6 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
// If there is a brace-initializer, cannot allocate fewer elements than inits.
unsigned minElements = 0;
- if (e->isArray() && e->hasInitializer()) {
- cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array initializer");
- }
mlir::Value numElements = nullptr;
mlir::Value allocSizeWithoutCookie = nullptr;
@@ -667,8 +719,11 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
!e->getOperatorDelete()->isReservedGlobalPlacementOperator())
cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: operator delete");
- if (allocSize != allocSizeWithoutCookie)
- cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array with cookies");
+ if (allocSize != allocSizeWithoutCookie) {
+ assert(e->isArray());
+ allocation = cgm.getCXXABI().initializeArrayCookie(
+ *this, allocation, numElements, e, allocType);
+ }
mlir::Type elementTy;
if (e->isArray()) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 138082b..33eb748 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -2041,8 +2041,9 @@ mlir::Value ScalarExprEmitter::VisitMemberExpr(MemberExpr *e) {
assert(!cir::MissingFeatures::tryEmitAsConstant());
Expr::EvalResult result;
if (e->EvaluateAsInt(result, cgf.getContext(), Expr::SE_AllowSideEffects)) {
- cgf.cgm.errorNYI(e->getSourceRange(), "Constant interger member expr");
- // Fall through to emit this as a non-constant access.
+ llvm::APSInt value = result.Val.getInt();
+ cgf.emitIgnoredExpr(e->getBase());
+ return builder.getConstInt(cgf.getLoc(e->getExprLoc()), value);
}
return emitLoadOfLValue(e);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 25a46df..d3c0d9f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -551,6 +551,49 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
const auto funcDecl = cast<FunctionDecl>(gd.getDecl());
curGD = gd;
+ if (funcDecl->isInlineBuiltinDeclaration()) {
+ // When generating code for a builtin with an inline declaration, use a
+ // mangled name to hold the actual body, while keeping an external
+ // declaration in case the function pointer is referenced somewhere.
+ std::string fdInlineName = (cgm.getMangledName(funcDecl) + ".inline").str();
+ cir::FuncOp clone =
+ mlir::cast_or_null<cir::FuncOp>(cgm.getGlobalValue(fdInlineName));
+ if (!clone) {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPoint(fn);
+ clone = builder.create<cir::FuncOp>(fn.getLoc(), fdInlineName,
+ fn.getFunctionType());
+ clone.setLinkage(cir::GlobalLinkageKind::InternalLinkage);
+ clone.setSymVisibility("private");
+ clone.setInlineKind(cir::InlineKind::AlwaysInline);
+ }
+ fn.setLinkage(cir::GlobalLinkageKind::ExternalLinkage);
+ fn.setSymVisibility("private");
+ fn = clone;
+ } else {
+ // Detect the unusual situation where an inline version is shadowed by a
+ // non-inline version. In that case we should pick the external one
+ // everywhere. That's GCC behavior too.
+ for (const FunctionDecl *pd = funcDecl->getPreviousDecl(); pd;
+ pd = pd->getPreviousDecl()) {
+ if (LLVM_UNLIKELY(pd->isInlineBuiltinDeclaration())) {
+ std::string inlineName = funcDecl->getName().str() + ".inline";
+ if (auto inlineFn = mlir::cast_or_null<cir::FuncOp>(
+ cgm.getGlobalValue(inlineName))) {
+ // Replace all uses of the .inline function with the regular function
+ // FIXME: This performs a linear walk over the module. Introduce some
+ // caching here.
+ if (inlineFn
+ .replaceAllSymbolUses(fn.getSymNameAttr(), cgm.getModule())
+ .failed())
+ llvm_unreachable("Failed to replace inline builtin symbol uses");
+ inlineFn.erase();
+ }
+ break;
+ }
+ }
+ }
+
SourceLocation loc = funcDecl->getLocation();
Stmt *body = funcDecl->getBody();
SourceRange bodyRange =
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 5a71126..e3b9b6a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -60,11 +60,44 @@ private:
/// is where the next operations will be introduced.
CIRGenBuilderTy &builder;
+ /// A jump destination is an abstract label, branching to which may
+ /// require a jump out through normal cleanups.
+ struct JumpDest {
+ JumpDest() = default;
+ JumpDest(mlir::Block *block, EHScopeStack::stable_iterator depth = {},
+ unsigned index = 0)
+ : block(block) {}
+
+ bool isValid() const { return block != nullptr; }
+ mlir::Block *getBlock() const { return block; }
+ EHScopeStack::stable_iterator getScopeDepth() const { return scopeDepth; }
+ unsigned getDestIndex() const { return index; }
+
+ // This should be used cautiously.
+ void setScopeDepth(EHScopeStack::stable_iterator depth) {
+ scopeDepth = depth;
+ }
+
+ private:
+ mlir::Block *block = nullptr;
+ EHScopeStack::stable_iterator scopeDepth;
+ unsigned index;
+ };
+
public:
/// The GlobalDecl for the current function being compiled or the global
/// variable currently being initialized.
clang::GlobalDecl curGD;
+ /// Unified return block.
+ /// In CIR this is a function because each scope might have
+ /// its associated return block.
+ JumpDest returnBlock(mlir::Block *retBlock) {
+ return getJumpDestInCurrentScope(retBlock);
+ }
+
+ unsigned nextCleanupDestIndex = 1;
+
/// The compiler-generated variable that holds the return value.
std::optional<mlir::Value> fnRetAlloca;
@@ -574,6 +607,16 @@ public:
}
};
+ /// The given basic block lies in the current EH scope, but may be a
+ /// target of a potentially scope-crossing jump; get a stable handle
+ /// to which we can perform this jump later.
+ /// CIRGen: this mostly tracks state for figuring out the proper scope
+ /// information, no actual branches are emitted.
+ JumpDest getJumpDestInCurrentScope(mlir::Block *target) {
+ return JumpDest(target, ehStack.getInnermostNormalCleanup(),
+ nextCleanupDestIndex++);
+ }
+
/// Perform the usual unary conversions on the specified expression and
/// compare the result against zero, returning an Int1Ty value.
mlir::Value evaluateExprAsBool(const clang::Expr *e);
@@ -954,6 +997,9 @@ public:
LexicalScope *parentScope = nullptr;
+ // Holds the actual value for ScopeKind::Try
+ cir::TryOp tryOp = nullptr;
+
// Only Regular is used at the moment. Support for other kinds will be
// added as the relevant statements/expressions are upstreamed.
enum Kind {
@@ -1013,6 +1059,10 @@ public:
void setAsGlobalInit() { scopeKind = Kind::GlobalInit; }
void setAsSwitch() { scopeKind = Kind::Switch; }
void setAsTernary() { scopeKind = Kind::Ternary; }
+ void setAsTry(cir::TryOp op) {
+ scopeKind = Kind::Try;
+ tryOp = op;
+ }
// Lazy create cleanup block or return what's available.
mlir::Block *getOrCreateCleanupBlock(mlir::OpBuilder &builder) {
@@ -1022,6 +1072,11 @@ public:
return cleanupBlock;
}
+ cir::TryOp getTry() {
+ assert(isTry());
+ return tryOp;
+ }
+
mlir::Block *getCleanupBlock(mlir::OpBuilder &builder) {
return cleanupBlock;
}
@@ -1209,6 +1264,8 @@ public:
LValue emitBinaryOperatorLValue(const BinaryOperator *e);
+ cir::BrOp emitBranchThroughCleanup(mlir::Location loc, JumpDest dest);
+
mlir::LogicalResult emitBreakStmt(const clang::BreakStmt &s);
RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID,
@@ -1348,6 +1405,13 @@ public:
mlir::LogicalResult emitCXXTryStmt(const clang::CXXTryStmt &s);
+ mlir::LogicalResult emitCXXTryStmtUnderScope(const clang::CXXTryStmt &s);
+
+ void enterCXXTryStmt(const CXXTryStmt &s, cir::TryOp tryOp,
+ bool isFnTryBlock = false);
+
+ void exitCXXTryStmt(const CXXTryStmt &s, bool isFnTryBlock = false);
+
void emitCtorPrologue(const clang::CXXConstructorDecl *ctor,
clang::CXXCtorType ctorType, FunctionArgList &args);
@@ -1595,6 +1659,10 @@ public:
bool buildingTopLevelCase);
mlir::LogicalResult emitSwitchStmt(const clang::SwitchStmt &s);
+ mlir::Value emitTargetBuiltinExpr(unsigned builtinID,
+ const clang::CallExpr *e,
+ ReturnValueSlot &returnValue);
+
/// Given a value and its clang type, returns the value casted to its memory
/// representation.
/// Note: CIR defers most of the special casting to the final lowering passes
@@ -1633,6 +1701,8 @@ public:
mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);
+ mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e);
+
/// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
/// nonnull, if 1\p LHS is marked _Nonnull.
void emitNullabilityCheck(LValue lhs, mlir::Value rhs,
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index c184d4a..e620310 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -135,8 +135,14 @@ public:
cir::PointerType destCIRTy, bool isRefCast,
Address src) override;
- /**************************** RTTI Uniqueness ******************************/
+ Address initializeArrayCookie(CIRGenFunction &cgf, Address newPtr,
+ mlir::Value numElements, const CXXNewExpr *e,
+ QualType elementType) override;
+
protected:
+ CharUnits getArrayCookieSizeImpl(QualType elementType) override;
+
+ /**************************** RTTI Uniqueness ******************************/
/// Returns true if the ABI requires RTTI type_info objects to be unique
/// across a program.
virtual bool shouldRTTIBeUnique() const { return true; }
@@ -2003,3 +2009,70 @@ mlir::Value CIRGenItaniumCXXABI::emitDynamicCast(CIRGenFunction &cgf,
return cgf.getBuilder().createDynCast(loc, src.getPointer(), destCIRTy,
isRefCast, castInfo);
}
+
+/************************** Array allocation cookies **************************/
+
+CharUnits CIRGenItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) {
+ // The array cookie is a size_t; pad that up to the element alignment.
+ // The cookie is actually right-justified in that space.
+ return std::max(
+ cgm.getSizeSize(),
+ cgm.getASTContext().getPreferredTypeAlignInChars(elementType));
+}
+
+Address CIRGenItaniumCXXABI::initializeArrayCookie(CIRGenFunction &cgf,
+ Address newPtr,
+ mlir::Value numElements,
+ const CXXNewExpr *e,
+ QualType elementType) {
+ assert(requiresArrayCookie(e));
+
+ // TODO: When sanitizer support is implemented, we'll need to
+ // get the address space from `newPtr`.
+ assert(!cir::MissingFeatures::addressSpace());
+ assert(!cir::MissingFeatures::sanitizers());
+
+ ASTContext &ctx = cgm.getASTContext();
+ CharUnits sizeSize = cgf.getSizeSize();
+ mlir::Location loc = cgf.getLoc(e->getSourceRange());
+
+ // The size of the cookie.
+ CharUnits cookieSize =
+ std::max(sizeSize, ctx.getPreferredTypeAlignInChars(elementType));
+ assert(cookieSize == getArrayCookieSizeImpl(elementType));
+
+ cir::PointerType u8PtrTy = cgf.getBuilder().getUInt8PtrTy();
+ mlir::Value baseBytePtr =
+ cgf.getBuilder().createPtrBitcast(newPtr.getPointer(), u8PtrTy);
+
+ // Compute an offset to the cookie.
+ CharUnits cookieOffset = cookieSize - sizeSize;
+ mlir::Value cookiePtrValue = baseBytePtr;
+ if (!cookieOffset.isZero()) {
+ mlir::Value offsetOp = cgf.getBuilder().getSignedInt(
+ loc, cookieOffset.getQuantity(), /*width=*/32);
+ cookiePtrValue =
+ cgf.getBuilder().createPtrStride(loc, cookiePtrValue, offsetOp);
+ }
+
+ CharUnits baseAlignment = newPtr.getAlignment();
+ CharUnits cookiePtrAlignment = baseAlignment.alignmentAtOffset(cookieOffset);
+ Address cookiePtr(cookiePtrValue, u8PtrTy, cookiePtrAlignment);
+
+ // Write the number of elements into the appropriate slot.
+ Address numElementsPtr =
+ cookiePtr.withElementType(cgf.getBuilder(), cgf.SizeTy);
+ cgf.getBuilder().createStore(loc, numElements, numElementsPtr);
+
+ // Finally, compute a pointer to the actual data buffer by skipping
+ // over the cookie completely.
+ mlir::Value dataOffset =
+ cgf.getBuilder().getSignedInt(loc, cookieSize.getQuantity(),
+ /*width=*/32);
+ mlir::Value dataPtr =
+ cgf.getBuilder().createPtrStride(loc, baseBytePtr, dataOffset);
+ mlir::Value finalPtr =
+ cgf.getBuilder().createPtrBitcast(dataPtr, newPtr.getElementType());
+ CharUnits finalAlignment = baseAlignment.alignmentAtOffset(cookieSize);
+ return Address(finalPtr, newPtr.getElementType(), finalAlignment);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 127f763..6b29373 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -102,7 +102,7 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
// TODO(CIR): Should be updated once TypeSizeInfoAttr is upstreamed
const unsigned sizeTypeSize =
astContext.getTypeSize(astContext.getSignedSizeType());
- SizeAlignInBytes = astContext.toCharUnitsFromBits(sizeTypeSize).getQuantity();
+ SizeSizeInBytes = astContext.toCharUnitsFromBits(sizeTypeSize).getQuantity();
// In CIRGenTypeCache, UIntPtrTy and SizeType are fields of the same union
UIntPtrTy =
cir::IntType::get(&getMLIRContext(), sizeTypeSize, /*isSigned=*/false);
@@ -1917,6 +1917,17 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
const Decl *decl = globalDecl.getDecl();
func.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(decl));
}
+
+ // If we plan on emitting this inline builtin, we can't treat it as a builtin.
+ const auto *fd = cast<FunctionDecl>(globalDecl.getDecl());
+ if (fd->isInlineBuiltinDeclaration()) {
+ const FunctionDecl *fdBody;
+ bool hasBody = fd->hasBody(fdBody);
+ (void)hasBody;
+ assert(hasBody && "Inline builtin declarations should always have an "
+ "available body!");
+ assert(!cir::MissingFeatures::attributeNoBuiltin());
+ }
}
void CIRGenModule::setCIRFunctionAttributesForDefinition(
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index ad8c4d0..f486c46 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -446,54 +446,89 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) {
mlir::Location loc = getLoc(s.getSourceRange());
const Expr *rv = s.getRetValue();
- if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() &&
- s.getNRVOCandidate()->isNRVOVariable()) {
- assert(!cir::MissingFeatures::openMP());
- assert(!cir::MissingFeatures::nrvo());
- } else if (!rv) {
- // No return expression. Do nothing.
- } else if (rv->getType()->isVoidType()) {
- // Make sure not to return anything, but evaluate the expression
- // for side effects.
- if (rv) {
- emitAnyExpr(rv);
+ RunCleanupsScope cleanupScope(*this);
+ bool createNewScope = false;
+ if (const auto *ewc = dyn_cast_or_null<ExprWithCleanups>(rv)) {
+ rv = ewc->getSubExpr();
+ createNewScope = true;
+ }
+
+ auto handleReturnVal = [&]() {
+ if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() &&
+ s.getNRVOCandidate()->isNRVOVariable()) {
+ assert(!cir::MissingFeatures::openMP());
+ assert(!cir::MissingFeatures::nrvo());
+ } else if (!rv) {
+ // No return expression. Do nothing.
+ } else if (rv->getType()->isVoidType()) {
+ // Make sure not to return anything, but evaluate the expression
+ // for side effects.
+ if (rv) {
+ emitAnyExpr(rv);
+ }
+ } else if (cast<FunctionDecl>(curGD.getDecl())
+ ->getReturnType()
+ ->isReferenceType()) {
+ // If this function returns a reference, take the address of the
+ // expression rather than the value.
+ RValue result = emitReferenceBindingToExpr(rv);
+ builder.CIRBaseBuilderTy::createStore(loc, result.getValue(),
+ *fnRetAlloca);
+ } else {
+ mlir::Value value = nullptr;
+ switch (CIRGenFunction::getEvaluationKind(rv->getType())) {
+ case cir::TEK_Scalar:
+ value = emitScalarExpr(rv);
+ if (value) { // Change this to an assert once emitScalarExpr is complete
+ builder.CIRBaseBuilderTy::createStore(loc, value, *fnRetAlloca);
+ }
+ break;
+ case cir::TEK_Complex:
+ emitComplexExprIntoLValue(rv,
+ makeAddrLValue(returnValue, rv->getType()),
+ /*isInit=*/true);
+ break;
+ case cir::TEK_Aggregate:
+ assert(!cir::MissingFeatures::aggValueSlotGC());
+ emitAggExpr(rv, AggValueSlot::forAddr(returnValue, Qualifiers(),
+ AggValueSlot::IsDestructed,
+ AggValueSlot::IsNotAliased,
+ getOverlapForReturnValue()));
+ break;
+ }
}
- } else if (cast<FunctionDecl>(curGD.getDecl())
- ->getReturnType()
- ->isReferenceType()) {
- // If this function returns a reference, take the address of the
- // expression rather than the value.
- RValue result = emitReferenceBindingToExpr(rv);
- builder.CIRBaseBuilderTy::createStore(loc, result.getValue(), *fnRetAlloca);
+ };
+
+ if (!createNewScope) {
+ handleReturnVal();
} else {
- mlir::Value value = nullptr;
- switch (CIRGenFunction::getEvaluationKind(rv->getType())) {
- case cir::TEK_Scalar:
- value = emitScalarExpr(rv);
- if (value) { // Change this to an assert once emitScalarExpr is complete
- builder.CIRBaseBuilderTy::createStore(loc, value, *fnRetAlloca);
- }
- break;
- case cir::TEK_Complex:
- emitComplexExprIntoLValue(rv, makeAddrLValue(returnValue, rv->getType()),
- /*isInit=*/true);
- break;
- case cir::TEK_Aggregate:
- assert(!cir::MissingFeatures::aggValueSlotGC());
- emitAggExpr(rv, AggValueSlot::forAddr(returnValue, Qualifiers(),
- AggValueSlot::IsDestructed,
- AggValueSlot::IsNotAliased,
- getOverlapForReturnValue()));
- break;
+ mlir::Location scopeLoc =
+ getLoc(rv ? rv->getSourceRange() : s.getSourceRange());
+ // First create cir.scope and later emit it's body. Otherwise all CIRGen
+ // dispatched by `handleReturnVal()` might needs to manipulate blocks and
+ // look into parents, which are all unlinked.
+ mlir::OpBuilder::InsertPoint scopeBody;
+ cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ scopeBody = b.saveInsertionPoint();
+ });
+ {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.restoreInsertionPoint(scopeBody);
+ CIRGenFunction::LexicalScope lexScope{*this, scopeLoc,
+ builder.getInsertionBlock()};
+ handleReturnVal();
}
}
+ cleanupScope.forceCleanup();
+
+ // In CIR we might have returns in different scopes.
+ // FIXME(cir): cleanup code is handling actual return emission, the logic
+ // should try to match traditional codegen more closely (to the extent which
+ // is possible).
auto *retBlock = curLexScope->getOrCreateRetBlock(*this, loc);
- // This should emit a branch through the cleanup block if one exists.
- builder.create<cir::BrOp>(loc, retBlock);
- assert(!cir::MissingFeatures::emitBranchThroughCleanup());
- if (ehStack.stable_begin() != currentCleanupStackDepth)
- cgm.errorNYI(s.getSourceRange(), "return with cleanup stack");
+ emitBranchThroughCleanup(loc, returnBlock(retBlock));
// Insert the new block to continue codegen after branch to ret block.
builder.createBlock(builder.getBlock()->getParent());
@@ -1063,5 +1098,5 @@ void CIRGenFunction::emitReturnOfRValue(mlir::Location loc, RValue rv,
assert(!cir::MissingFeatures::emitBranchThroughCleanup());
builder.create<cir::BrOp>(loc, retBlock);
if (ehStack.stable_begin() != currentCleanupStackDepth)
- cgm.errorNYI(loc, "return with cleanup stack");
+ cgm.errorNYI(loc, "return of r-value with cleanup stack");
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
index b5612d9..ff5842c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
@@ -74,11 +74,17 @@ struct CIRGenTypeCache {
unsigned char PointerSizeInBytes;
};
- /// The alignment of size_t.
- unsigned char SizeAlignInBytes;
+ /// The size and alignment of size_t.
+ union {
+ unsigned char SizeSizeInBytes; // sizeof(size_t)
+ unsigned char SizeAlignInBytes;
+ };
cir::TargetAddressSpaceAttr cirAllocaAddressSpace;
+ clang::CharUnits getSizeSize() const {
+ return clang::CharUnits::fromQuantity(SizeSizeInBytes);
+ }
clang::CharUnits getSizeAlign() const {
return clang::CharUnits::fromQuantity(SizeAlignInBytes);
}
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 36db4bd..7c31bea 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -11,13 +11,14 @@ add_clang_library(clangCIR
CIRGenAsm.cpp
CIRGenAtomic.cpp
CIRGenBuilder.cpp
+ CIRGenBuiltin.cpp
+ CIRGenBuiltinX86.cpp
CIRGenCall.cpp
CIRGenClass.cpp
CIRGenCleanup.cpp
CIRGenCoroutine.cpp
CIRGenCXX.cpp
CIRGenCXXABI.cpp
- CIRGenBuiltin.cpp
CIRGenDecl.cpp
CIRGenDeclCXX.cpp
CIRGenDeclOpenACC.cpp
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
index 67a72f5..4198c23 100644
--- a/clang/lib/CIR/CodeGen/EHScopeStack.h
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -18,12 +18,38 @@
#ifndef CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H
#define CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "llvm/ADT/SmallVector.h"
namespace clang::CIRGen {
class CIRGenFunction;
+/// A branch fixup. These are required when emitting a goto to a
+/// label which hasn't been emitted yet. The goto is optimistically
+/// emitted as a branch to the basic block for the label, and (if it
+/// occurs in a scope with non-trivial cleanups) a fixup is added to
+/// the innermost cleanup. When a (normal) cleanup is popped, any
+/// unresolved fixups in that scope are threaded through the cleanup.
+struct BranchFixup {
+ /// The block containing the terminator which needs to be modified
+ /// into a switch if this fixup is resolved into the current scope.
+ /// If null, LatestBranch points directly to the destination.
+ mlir::Block *optimisticBranchBlock = nullptr;
+
+ /// The ultimate destination of the branch.
+ ///
+ /// This can be set to null to indicate that this fixup was
+ /// successfully resolved.
+ mlir::Block *destination = nullptr;
+
+ /// The destination index value.
+ unsigned destinationIndex = 0;
+
+ /// The initial branch of the fixup.
+ cir::BrOp initialBranch = {};
+};
+
enum CleanupKind : unsigned {
/// Denotes a cleanup that should run when a scope is exited using exceptional
/// control flow (a throw statement leading to stack unwinding, ).
@@ -126,9 +152,31 @@ private:
/// The first valid entry in the buffer.
char *startOfData = nullptr;
+ /// The innermost normal cleanup on the stack.
+ stable_iterator innermostNormalCleanup = stable_end();
+
/// The CGF this Stack belong to
CIRGenFunction *cgf = nullptr;
+ /// The current set of branch fixups. A branch fixup is a jump to
+ /// an as-yet unemitted label, i.e. a label for which we don't yet
+ /// know the EH stack depth. Whenever we pop a cleanup, we have
+ /// to thread all the current branch fixups through it.
+ ///
+ /// Fixups are recorded as the Use of the respective branch or
+ /// switch statement. The use points to the final destination.
+ /// When popping out of a cleanup, these uses are threaded through
+ /// the cleanup and adjusted to point to the new cleanup.
+ ///
+ /// Note that branches are allowed to jump into protected scopes
+ /// in certain situations; e.g. the following code is legal:
+ /// struct A { ~A(); }; // trivial ctor, non-trivial dtor
+ /// goto foo;
+ /// A a;
+ /// foo:
+ /// bar();
+ llvm::SmallVector<BranchFixup> branchFixups;
+
// This class uses a custom allocator for maximum efficiency because cleanups
// are allocated and freed very frequently. It's basically a bump pointer
// allocator, but we can't use LLVM's BumpPtrAllocator because we use offsets
@@ -155,9 +203,29 @@ public:
/// Pops a cleanup scope off the stack. This is private to CIRGenCleanup.cpp.
void popCleanup();
+ /// Push a set of catch handlers on the stack. The catch is
+ /// uninitialized and will need to have the given number of handlers
+ /// set on it.
+ class EHCatchScope *pushCatch(unsigned numHandlers);
+
+ /// Pops a catch scope off the stack. This is private to CIRGenException.cpp.
+ void popCatch();
+
/// Determines whether the exception-scopes stack is empty.
bool empty() const { return startOfData == endOfBuffer; }
+ /// Determines whether there are any normal cleanups on the stack.
+ bool hasNormalCleanups() const {
+ return innermostNormalCleanup != stable_end();
+ }
+
+ /// Returns the innermost normal cleanup on the stack, or
+ /// stable_end() if there are no normal cleanups.
+ stable_iterator getInnermostNormalCleanup() const {
+ return innermostNormalCleanup;
+ }
+ stable_iterator getInnermostActiveNormalCleanup() const;
+
/// An unstable reference to a scope-stack depth. Invalidated by
/// pushes but not pops.
class iterator;
@@ -172,12 +240,30 @@ public:
return stable_iterator(endOfBuffer - startOfData);
}
+ /// Create a stable reference to the bottom of the EH stack.
+ static stable_iterator stable_end() { return stable_iterator(0); }
+
/// Turn a stable reference to a scope depth into a unstable pointer
/// to the EH stack.
iterator find(stable_iterator savePoint) const;
- /// Create a stable reference to the bottom of the EH stack.
- static stable_iterator stable_end() { return stable_iterator(0); }
+ /// Add a branch fixup to the current cleanup scope.
+ BranchFixup &addBranchFixup() {
+ assert(hasNormalCleanups() && "adding fixup in scope without cleanups");
+ branchFixups.push_back(BranchFixup());
+ return branchFixups.back();
+ }
+
+ unsigned getNumBranchFixups() const { return branchFixups.size(); }
+ BranchFixup &getBranchFixup(unsigned i) {
+ assert(i < getNumBranchFixups());
+ return branchFixups[i];
+ }
+
+ /// Pops lazily-removed fixups from the end of the list. This
+ /// should only be called by procedures which have just popped a
+ /// cleanup or resolved one or more fixups.
+ void popNullFixups();
};
} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index ed606b7..fa180f5 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2941,6 +2941,21 @@ mlir::LogicalResult cir::ThrowOp::verify() {
}
//===----------------------------------------------------------------------===//
+// AtomicFetchOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult cir::AtomicFetchOp::verify() {
+ if (getBinop() != cir::AtomicFetchKind::Add &&
+ getBinop() != cir::AtomicFetchKind::Sub &&
+ getBinop() != cir::AtomicFetchKind::Max &&
+ getBinop() != cir::AtomicFetchKind::Min &&
+ !mlir::isa<cir::IntType>(getVal().getType()))
+ return emitError("only atomic add, sub, max, and min operation could "
+ "operate on floating-point values");
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
// TypeInfoAttr
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
index 8589a2e..46bd186 100644
--- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -551,10 +551,100 @@ public:
}
};
+class CIRTryOpFlattening : public mlir::OpRewritePattern<cir::TryOp> {
+public:
+ using OpRewritePattern<cir::TryOp>::OpRewritePattern;
+
+ mlir::Block *buildTryBody(cir::TryOp tryOp,
+ mlir::PatternRewriter &rewriter) const {
+ // Split the current block before the TryOp to create the inlining
+ // point.
+ mlir::Block *beforeTryScopeBlock = rewriter.getInsertionBlock();
+ mlir::Block *afterTry =
+ rewriter.splitBlock(beforeTryScopeBlock, rewriter.getInsertionPoint());
+
+ // Inline body region.
+ mlir::Block *beforeBody = &tryOp.getTryRegion().front();
+ rewriter.inlineRegionBefore(tryOp.getTryRegion(), afterTry);
+
+ // Branch into the body of the region.
+ rewriter.setInsertionPointToEnd(beforeTryScopeBlock);
+ cir::BrOp::create(rewriter, tryOp.getLoc(), mlir::ValueRange(), beforeBody);
+ return afterTry;
+ }
+
+ void buildHandlers(cir::TryOp tryOp, mlir::PatternRewriter &rewriter,
+ mlir::Block *afterBody, mlir::Block *afterTry,
+ SmallVectorImpl<cir::CallOp> &callsToRewrite,
+ SmallVectorImpl<mlir::Block *> &landingPads) const {
+ // Replace the tryOp return with a branch that jumps out of the body.
+ rewriter.setInsertionPointToEnd(afterBody);
+
+ mlir::Block *beforeCatch = rewriter.getInsertionBlock();
+ rewriter.setInsertionPointToEnd(beforeCatch);
+
+ // Check if the terminator is a YieldOp because there could be another
+ // terminator, e.g. unreachable
+ if (auto tryBodyYield = dyn_cast<cir::YieldOp>(afterBody->getTerminator()))
+ rewriter.replaceOpWithNewOp<cir::BrOp>(tryBodyYield, afterTry);
+
+ mlir::ArrayAttr handlers = tryOp.getHandlerTypesAttr();
+ if (!handlers || handlers.empty())
+ return;
+
+ llvm_unreachable("TryOpFlattening buildHandlers with CallsOp is NYI");
+ }
+
+ mlir::LogicalResult
+ matchAndRewrite(cir::TryOp tryOp,
+ mlir::PatternRewriter &rewriter) const override {
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ mlir::Block *afterBody = &tryOp.getTryRegion().back();
+
+ // Grab the collection of `cir.call exception`s to rewrite to
+ // `cir.try_call`.
+ llvm::SmallVector<cir::CallOp, 4> callsToRewrite;
+ tryOp.getTryRegion().walk([&](CallOp op) {
+ // Only grab calls within immediate closest TryOp scope.
+ if (op->getParentOfType<cir::TryOp>() != tryOp)
+ return;
+ assert(!cir::MissingFeatures::opCallExceptionAttr());
+ callsToRewrite.push_back(op);
+ });
+
+ if (!callsToRewrite.empty())
+ llvm_unreachable(
+ "TryOpFlattening with try block that contains CallOps is NYI");
+
+ // Build try body.
+ mlir::Block *afterTry = buildTryBody(tryOp, rewriter);
+
+ // Build handlers.
+ llvm::SmallVector<mlir::Block *, 4> landingPads;
+ buildHandlers(tryOp, rewriter, afterBody, afterTry, callsToRewrite,
+ landingPads);
+
+ rewriter.eraseOp(tryOp);
+
+ assert((landingPads.size() == callsToRewrite.size()) &&
+ "expected matching number of entries");
+
+ // Quick block cleanup: no indirection to the post try block.
+ auto brOp = dyn_cast<cir::BrOp>(afterTry->getTerminator());
+ if (brOp && brOp.getDest()->hasNoPredecessors()) {
+ mlir::Block *srcBlock = brOp.getDest();
+ rewriter.eraseOp(brOp);
+ rewriter.mergeBlocks(srcBlock, afterTry);
+ }
+
+ return mlir::success();
+ }
+};
+
void populateFlattenCFGPatterns(RewritePatternSet &patterns) {
patterns
.add<CIRIfFlattening, CIRLoopOpInterfaceFlattening, CIRScopeOpFlattening,
- CIRSwitchOpFlattening, CIRTernaryOpFlattening>(
+ CIRSwitchOpFlattening, CIRTernaryOpFlattening, CIRTryOpFlattening>(
patterns.getContext());
}
@@ -568,7 +658,7 @@ void CIRFlattenCFGPass::runOnOperation() {
assert(!cir::MissingFeatures::ifOp());
assert(!cir::MissingFeatures::switchOp());
assert(!cir::MissingFeatures::tryOp());
- if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp>(op))
+ if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, TryOp>(op))
ops.push_back(op);
});
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3fc94eb..bb75f2d 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -770,6 +770,147 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite(
return mlir::success();
}
+static mlir::LLVM::AtomicBinOp
+getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) {
+ switch (k) {
+ case cir::AtomicFetchKind::Add:
+ return isInt ? mlir::LLVM::AtomicBinOp::add : mlir::LLVM::AtomicBinOp::fadd;
+ case cir::AtomicFetchKind::Sub:
+ return isInt ? mlir::LLVM::AtomicBinOp::sub : mlir::LLVM::AtomicBinOp::fsub;
+ case cir::AtomicFetchKind::And:
+ return mlir::LLVM::AtomicBinOp::_and;
+ case cir::AtomicFetchKind::Xor:
+ return mlir::LLVM::AtomicBinOp::_xor;
+ case cir::AtomicFetchKind::Or:
+ return mlir::LLVM::AtomicBinOp::_or;
+ case cir::AtomicFetchKind::Nand:
+ return mlir::LLVM::AtomicBinOp::nand;
+ case cir::AtomicFetchKind::Max: {
+ if (!isInt)
+ return mlir::LLVM::AtomicBinOp::fmax;
+ return isSignedInt ? mlir::LLVM::AtomicBinOp::max
+ : mlir::LLVM::AtomicBinOp::umax;
+ }
+ case cir::AtomicFetchKind::Min: {
+ if (!isInt)
+ return mlir::LLVM::AtomicBinOp::fmin;
+ return isSignedInt ? mlir::LLVM::AtomicBinOp::min
+ : mlir::LLVM::AtomicBinOp::umin;
+ }
+ }
+ llvm_unreachable("Unknown atomic fetch opcode");
+}
+
+static llvm::StringLiteral getLLVMBinop(cir::AtomicFetchKind k, bool isInt) {
+ switch (k) {
+ case cir::AtomicFetchKind::Add:
+ return isInt ? mlir::LLVM::AddOp::getOperationName()
+ : mlir::LLVM::FAddOp::getOperationName();
+ case cir::AtomicFetchKind::Sub:
+ return isInt ? mlir::LLVM::SubOp::getOperationName()
+ : mlir::LLVM::FSubOp::getOperationName();
+ case cir::AtomicFetchKind::And:
+ return mlir::LLVM::AndOp::getOperationName();
+ case cir::AtomicFetchKind::Xor:
+ return mlir::LLVM::XOrOp::getOperationName();
+ case cir::AtomicFetchKind::Or:
+ return mlir::LLVM::OrOp::getOperationName();
+ case cir::AtomicFetchKind::Nand:
+ // There's no nand binop in LLVM, this is later fixed with a not.
+ return mlir::LLVM::AndOp::getOperationName();
+ case cir::AtomicFetchKind::Max:
+ case cir::AtomicFetchKind::Min:
+ llvm_unreachable("handled in buildMinMaxPostOp");
+ }
+ llvm_unreachable("Unknown atomic fetch opcode");
+}
+
+mlir::Value CIRToLLVMAtomicFetchOpLowering::buildPostOp(
+ cir::AtomicFetchOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter, mlir::Value rmwVal,
+ bool isInt) const {
+ SmallVector<mlir::Value> atomicOperands = {rmwVal, adaptor.getVal()};
+ SmallVector<mlir::Type> atomicResTys = {rmwVal.getType()};
+ return rewriter
+ .create(op.getLoc(),
+ rewriter.getStringAttr(getLLVMBinop(op.getBinop(), isInt)),
+ atomicOperands, atomicResTys, {})
+ ->getResult(0);
+}
+
+mlir::Value CIRToLLVMAtomicFetchOpLowering::buildMinMaxPostOp(
+ cir::AtomicFetchOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter, mlir::Value rmwVal, bool isInt,
+ bool isSigned) const {
+ mlir::Location loc = op.getLoc();
+
+ if (!isInt) {
+ if (op.getBinop() == cir::AtomicFetchKind::Max)
+ return mlir::LLVM::MaxNumOp::create(rewriter, loc, rmwVal,
+ adaptor.getVal());
+ return mlir::LLVM::MinNumOp::create(rewriter, loc, rmwVal,
+ adaptor.getVal());
+ }
+
+ mlir::LLVM::ICmpPredicate pred;
+ if (op.getBinop() == cir::AtomicFetchKind::Max) {
+ pred = isSigned ? mlir::LLVM::ICmpPredicate::sgt
+ : mlir::LLVM::ICmpPredicate::ugt;
+ } else { // Min
+ pred = isSigned ? mlir::LLVM::ICmpPredicate::slt
+ : mlir::LLVM::ICmpPredicate::ult;
+ }
+ mlir::Value cmp = mlir::LLVM::ICmpOp::create(
+ rewriter, loc,
+ mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(), pred), rmwVal,
+ adaptor.getVal());
+ return mlir::LLVM::SelectOp::create(rewriter, loc, cmp, rmwVal,
+ adaptor.getVal());
+}
+
+mlir::LogicalResult CIRToLLVMAtomicFetchOpLowering::matchAndRewrite(
+ cir::AtomicFetchOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ bool isInt = false;
+ bool isSignedInt = false;
+ if (auto intTy = mlir::dyn_cast<cir::IntType>(op.getVal().getType())) {
+ isInt = true;
+ isSignedInt = intTy.isSigned();
+ } else if (mlir::isa<cir::SingleType, cir::DoubleType>(
+ op.getVal().getType())) {
+ isInt = false;
+ } else {
+ return op.emitError() << "Unsupported type: " << op.getVal().getType();
+ }
+
+ mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(op.getMemOrder());
+ mlir::LLVM::AtomicBinOp llvmBinOp =
+ getLLVMAtomicBinOp(op.getBinop(), isInt, isSignedInt);
+ auto rmwVal = mlir::LLVM::AtomicRMWOp::create(rewriter, op.getLoc(),
+ llvmBinOp, adaptor.getPtr(),
+ adaptor.getVal(), llvmOrder);
+
+ mlir::Value result = rmwVal.getResult();
+ if (!op.getFetchFirst()) {
+ if (op.getBinop() == cir::AtomicFetchKind::Max ||
+ op.getBinop() == cir::AtomicFetchKind::Min)
+ result = buildMinMaxPostOp(op, adaptor, rewriter, rmwVal.getRes(), isInt,
+ isSignedInt);
+ else
+ result = buildPostOp(op, adaptor, rewriter, rmwVal.getRes(), isInt);
+
+ // Compensate lack of nand binop in LLVM IR.
+ if (op.getBinop() == cir::AtomicFetchKind::Nand) {
+ auto negOne = mlir::LLVM::ConstantOp::create(rewriter, op.getLoc(),
+ result.getType(), -1);
+ result = mlir::LLVM::XOrOp::create(rewriter, op.getLoc(), result, negOne);
+ }
+ }
+
+ rewriter.replaceOp(op, result);
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMBitClrsbOpLowering::matchAndRewrite(
cir::BitClrsbOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 954ecab..61beb04 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -290,6 +290,8 @@ void arm::setArchNameInTriple(const Driver &D, const ArgList &Args,
// Thumb2 is the default for V7 on Darwin.
(llvm::ARM::parseArchVersion(Suffix) == 7 &&
Triple.isOSBinFormatMachO()) ||
+ // Thumb2 is the default for Fuchsia.
+ Triple.isOSFuchsia() ||
// FIXME: this is invalid for WindowsCE
Triple.isOSWindows();
@@ -452,6 +454,9 @@ arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) {
case llvm::Triple::OpenBSD:
return FloatABI::SoftFP;
+ case llvm::Triple::Fuchsia:
+ return FloatABI::Hard;
+
default:
if (Triple.isOHOSFamily())
return FloatABI::Soft;
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 37c10c6..e5abf83 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -798,9 +798,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
}
if (!DryRun) {
+ const bool ContinuePPDirective =
+ State.Line->InMacroBody && Current.isNot(TT_LineComment);
Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
State.Column + Spaces + PPColumnCorrection,
- /*IsAligned=*/false, State.Line->InMacroBody);
+ /*IsAligned=*/false, ContinuePPDirective);
}
// If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
@@ -1176,10 +1178,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// about removing empty lines on closing blocks. Special case them here.
MaxEmptyLinesToKeep = 1;
}
- unsigned Newlines =
+ const unsigned Newlines =
std::max(1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
- bool ContinuePPDirective =
- State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
+ const bool ContinuePPDirective = State.Line->InPPDirective &&
+ State.Line->Type != LT_ImportStatement &&
+ Current.isNot(TT_LineComment);
Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
CurrentState.IsAligned, ContinuePPDirective);
}
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 39fa25f..215ac18 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2214,9 +2214,9 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
else
PD << "expression";
- if (Diag(Loc, PD, FD)
- << false /*show bit size*/ << 0 << Ty << false /*return*/
- << TI.getTriple().str()) {
+ if (Diag(Loc, PD) << false /*show bit size*/ << 0 << Ty
+ << false /*return*/
+ << TI.getTriple().str()) {
if (D)
D->setInvalidDecl();
}
@@ -2233,9 +2233,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
else
PD << "expression";
- if (Diag(Loc, PD, FD)
- << false /*show bit size*/ << 0 << Ty << true /*return*/
- << TI.getTriple().str()) {
+ if (Diag(Loc, PD) << false /*show bit size*/ << 0 << Ty << true /*return*/
+ << TI.getTriple().str()) {
if (D)
D->setInvalidDecl();
}
diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp
index 9b677f4..bf32491 100644
--- a/clang/lib/Sema/SemaBase.cpp
+++ b/clang/lib/Sema/SemaBase.cpp
@@ -58,13 +58,13 @@ SemaBase::SemaDiagnosticBuilder::getDeviceDeferredDiags() const {
return S.DeviceDeferredDiags;
}
-Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID,
- bool DeferHint) {
+Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc,
+ unsigned DiagID) {
bool IsError =
getDiagnostics().getDiagnosticIDs()->isDefaultMappingAsError(DiagID);
bool ShouldDefer = getLangOpts().CUDA && getLangOpts().GPUDeferDiag &&
DiagnosticIDs::isDeferrable(DiagID) &&
- (DeferHint || SemaRef.DeferDiags || !IsError);
+ (SemaRef.DeferDiags || !IsError);
auto SetIsLastErrorImmediate = [&](bool Flag) {
if (IsError)
SemaRef.IsLastErrorImmediate = Flag;
@@ -83,16 +83,13 @@ Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID,
}
Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc,
- const PartialDiagnostic &PD,
- bool DeferHint) {
- return Diag(Loc, PD.getDiagID(), DeferHint) << PD;
+ const PartialDiagnostic &PD) {
+ return Diag(Loc, PD.getDiagID()) << PD;
}
SemaBase::SemaDiagnosticBuilder SemaBase::DiagCompat(SourceLocation Loc,
- unsigned CompatDiagId,
- bool DeferHint) {
+ unsigned CompatDiagId) {
return Diag(Loc,
- DiagnosticIDs::getCXXCompatDiagId(getLangOpts(), CompatDiagId),
- DeferHint);
+ DiagnosticIDs::getCXXCompatDiagId(getLangOpts(), CompatDiagId));
}
} // namespace clang
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 7da09e8..1f25111 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13208,7 +13208,10 @@ void OverloadCandidateSet::NoteCandidates(
auto Cands = CompleteCandidates(S, OCD, Args, OpLoc, Filter);
- S.Diag(PD.first, PD.second, shouldDeferDiags(S, Args, OpLoc));
+ {
+ Sema::DeferDiagsRAII RAII{S, shouldDeferDiags(S, Args, OpLoc)};
+ S.Diag(PD.first, PD.second);
+ }
// In WebAssembly we don't want to emit further diagnostics if a table is
// passed as an argument to a function.
@@ -13271,10 +13274,10 @@ void OverloadCandidateSet::NoteCandidates(Sema &S, ArrayRef<Expr *> Args,
// inform the future value of S.Diags.getNumOverloadCandidatesToShow().
S.Diags.overloadCandidatesShown(CandsShown);
- if (I != E)
- S.Diag(OpLoc, diag::note_ovl_too_many_candidates,
- shouldDeferDiags(S, Args, OpLoc))
- << int(E - I);
+ if (I != E) {
+ Sema::DeferDiagsRAII RAII{S, shouldDeferDiags(S, Args, OpLoc)};
+ S.Diag(OpLoc, diag::note_ovl_too_many_candidates) << int(E - I);
+ }
}
static SourceLocation
diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
index c5ef0d5..b5f91a3 100644
--- a/clang/lib/Sema/SemaRISCV.cpp
+++ b/clang/lib/Sema/SemaRISCV.cpp
@@ -1445,21 +1445,21 @@ void SemaRISCV::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D,
if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Double) &&
!FeatureMap.lookup("zve64d"))
- Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64d";
+ Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve64d";
// (ELEN, LMUL) pairs of (8, mf8), (16, mf4), (32, mf2), (64, m1) requires at
// least zve64x
else if (((EltSize == 64 && Info.ElementType->isIntegerType()) ||
MinElts == 1) &&
!FeatureMap.lookup("zve64x"))
- Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64x";
+ Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve64x";
else if (Info.ElementType->isFloat16Type() && !FeatureMap.lookup("zvfh") &&
!FeatureMap.lookup("zvfhmin") &&
!FeatureMap.lookup("xandesvpackfph"))
if (DeclareAndesVectorBuiltins) {
- Diag(Loc, diag::err_riscv_type_requires_extension, D)
+ Diag(Loc, diag::err_riscv_type_requires_extension)
<< Ty << "zvfh, zvfhmin or xandesvpackfph";
} else {
- Diag(Loc, diag::err_riscv_type_requires_extension, D)
+ Diag(Loc, diag::err_riscv_type_requires_extension)
<< Ty << "zvfh or zvfhmin";
}
else if (Info.ElementType->isBFloat16Type() &&
@@ -1467,18 +1467,18 @@ void SemaRISCV::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D,
!FeatureMap.lookup("xandesvbfhcvt") &&
!FeatureMap.lookup("experimental-zvfbfa"))
if (DeclareAndesVectorBuiltins) {
- Diag(Loc, diag::err_riscv_type_requires_extension, D)
+ Diag(Loc, diag::err_riscv_type_requires_extension)
<< Ty << "zvfbfmin or xandesvbfhcvt";
} else {
- Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfbfmin";
+ Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zvfbfmin";
}
else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Float) &&
!FeatureMap.lookup("zve32f"))
- Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f";
+ Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve32f";
// Given that caller already checked isRVVType() before calling this function,
// if we don't have at least zve32x supported, then we need to emit error.
else if (!FeatureMap.lookup("zve32x"))
- Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32x";
+ Diag(Loc, diag::err_riscv_type_requires_extension) << Ty << "zve32x";
}
/// Are the two types RVV-bitcast-compatible types? I.e. is bitcasting from the
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index cf20226..6579988 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -584,3 +584,526 @@ void clear_volatile(volatile void *p) {
// OGCG: store atomic volatile i8 0, ptr %{{.+}} seq_cst, align 1
}
+
+int atomic_fetch_add(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_add
+ // LLVM-LABEL: @atomic_fetch_add
+ // OGCG-LABEL: @atomic_fetch_add
+
+ return __atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_add_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_add_fetch
+ // LLVM-LABEL: @atomic_add_fetch
+ // OGCG-LABEL: @atomic_add_fetch
+
+ return __atomic_add_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch add seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw add ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = add i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw add ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = add i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_add(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_add
+ // LLVM-LABEL: @c11_atomic_fetch_add
+ // OGCG-LABEL: @c11_atomic_fetch_add
+
+ return __c11_atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw add ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_sub(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_sub
+ // LLVM-LABEL: @atomic_fetch_sub
+ // OGCG-LABEL: @atomic_fetch_sub
+
+ return __atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_sub_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_sub_fetch
+ // LLVM-LABEL: @atomic_sub_fetch
+ // OGCG-LABEL: @atomic_sub_fetch
+
+ return __atomic_sub_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw sub ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = sub i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw sub ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = sub i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_sub(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_sub
+ // LLVM-LABEL: @c11_atomic_fetch_sub
+ // OGCG-LABEL: @c11_atomic_fetch_sub
+
+ return __c11_atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw sub ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_fetch_add_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_fetch_add_fp
+ // LLVM-LABEL: @atomic_fetch_add_fp
+ // OGCG-LABEL: @atomic_fetch_add_fp
+
+ return __atomic_fetch_add(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch add seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_add_fetch_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_add_fetch_fp
+ // LLVM-LABEL: @atomic_add_fetch_fp
+ // OGCG-LABEL: @atomic_add_fetch_fp
+
+ return __atomic_add_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch add seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[OLD:.+]] = atomicrmw fadd ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = fadd float %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw fadd ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = fadd float %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float c11_atomic_fetch_sub_fp(_Atomic(float) *ptr, float value) {
+ // CIR-LABEL: @c11_atomic_fetch_sub_fp
+ // LLVM-LABEL: @c11_atomic_fetch_sub_fp
+ // OGCG-LABEL: @c11_atomic_fetch_sub_fp
+
+ return __c11_atomic_fetch_sub(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch sub seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fsub ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fsub ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_min(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_min
+ // LLVM-LABEL: @atomic_fetch_min
+ // OGCG-LABEL: @atomic_fetch_min
+
+ return __atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_min_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_min_fetch
+ // LLVM-LABEL: @atomic_min_fetch
+ // OGCG-LABEL: @atomic_min_fetch
+
+ return __atomic_min_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw min ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[OLD_LESS:.+]] = icmp slt i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: %[[RES:.+]] = select i1 %[[OLD_LESS]], i32 %[[OLD]], i32 %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw min ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[OLD_LESS:.+]] = icmp slt i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: %[[RES:.+]] = select i1 %[[OLD_LESS]], i32 %[[OLD]], i32 %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_min(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_min
+ // LLVM-LABEL: @c11_atomic_fetch_min
+ // OGCG-LABEL: @c11_atomic_fetch_min
+
+ return __c11_atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw min ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_fetch_min_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_fetch_min_fp
+ // LLVM-LABEL: @atomic_fetch_min_fp
+ // OGCG-LABEL: @atomic_fetch_min_fp
+
+ return __atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_min_fetch_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_min_fetch_fp
+ // LLVM-LABEL: @atomic_min_fetch_fp
+ // OGCG-LABEL: @atomic_min_fetch_fp
+
+ return __atomic_min_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[OLD:.+]] = atomicrmw fmin ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = call float @llvm.minnum.f32(float %[[OLD]], float %[[VAL]])
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw fmin ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = call float @llvm.minnum.f32(float %[[OLD]], float %[[VAL]])
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float c11_atomic_fetch_min_fp(_Atomic(float) *ptr, float value) {
+ // CIR-LABEL: @c11_atomic_fetch_min_fp
+ // LLVM-LABEL: @c11_atomic_fetch_min_fp
+ // OGCG-LABEL: @c11_atomic_fetch_min_fp
+
+ return __c11_atomic_fetch_min(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch min seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fmin ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_max(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_max
+ // LLVM-LABEL: @atomic_fetch_max
+ // OGCG-LABEL: @atomic_fetch_max
+
+ return __atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_max_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_max_fetch
+ // LLVM-LABEL: @atomic_max_fetch
+ // OGCG-LABEL: @atomic_max_fetch
+
+ return __atomic_max_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw max ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[OLD_GREATER:.+]] = icmp sgt i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: %[[RES:.+]] = select i1 %[[OLD_GREATER]], i32 %[[OLD]], i32 %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw max ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[OLD_GREATER:.+]] = icmp sgt i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: %[[RES:.+]] = select i1 %[[OLD_GREATER]], i32 %[[OLD]], i32 %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_max(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_max
+ // LLVM-LABEL: @c11_atomic_fetch_max
+ // OGCG-LABEL: @c11_atomic_fetch_max
+
+ return __c11_atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw max ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_fetch_max_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_fetch_max_fp
+ // LLVM-LABEL: @atomic_fetch_max_fp
+ // OGCG-LABEL: @atomic_fetch_max_fp
+
+ return __atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float atomic_max_fetch_fp(float *ptr, float value) {
+ // CIR-LABEL: @atomic_max_fetch_fp
+ // LLVM-LABEL: @atomic_max_fetch_fp
+ // OGCG-LABEL: @atomic_max_fetch_fp
+
+ return __atomic_max_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[OLD:.+]] = atomicrmw fmax ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = call float @llvm.maxnum.f32(float %[[OLD]], float %[[VAL]])
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw fmax ptr %{{.+}}, float %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = call float @llvm.maxnum.f32(float %[[OLD]], float %[[VAL]])
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+float c11_atomic_fetch_max_fp(_Atomic(float) *ptr, float value) {
+ // CIR-LABEL: @c11_atomic_fetch_max_fp
+ // LLVM-LABEL: @c11_atomic_fetch_max_fp
+ // OGCG-LABEL: @c11_atomic_fetch_max_fp
+
+ return __c11_atomic_fetch_max(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch max seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+
+ // LLVM: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw fmax ptr %{{.+}}, float %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store float %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_and(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_and
+ // LLVM-LABEL: @atomic_fetch_and
+ // OGCG-LABEL: @atomic_fetch_and
+
+ return __atomic_fetch_and(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch and seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_and_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_and_fetch
+ // LLVM-LABEL: @atomic_and_fetch
+ // OGCG-LABEL: @atomic_and_fetch
+
+ return __atomic_and_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch and seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw and ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = and i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw and ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = and i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_and(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_and
+ // LLVM-LABEL: @c11_atomic_fetch_and
+ // OGCG-LABEL: @c11_atomic_fetch_and
+
+ return __c11_atomic_fetch_and(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch and seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw and ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_or(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_or
+ // LLVM-LABEL: @atomic_fetch_or
+ // OGCG-LABEL: @atomic_fetch_or
+
+ return __atomic_fetch_or(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch or seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_or_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_or_fetch
+ // LLVM-LABEL: @atomic_or_fetch
+ // OGCG-LABEL: @atomic_or_fetch
+
+ return __atomic_or_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch or seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw or ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = or i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw or ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = or i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_or(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_or
+ // LLVM-LABEL: @c11_atomic_fetch_or
+ // OGCG-LABEL: @c11_atomic_fetch_or
+
+ return __c11_atomic_fetch_or(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch or seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw or ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_xor(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_xor
+ // LLVM-LABEL: @atomic_fetch_xor
+ // OGCG-LABEL: @atomic_fetch_xor
+
+ return __atomic_fetch_xor(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_xor_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_xor_fetch
+ // LLVM-LABEL: @atomic_xor_fetch
+ // OGCG-LABEL: @atomic_xor_fetch
+
+ return __atomic_xor_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw xor ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[RES:.+]] = xor i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw xor ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[RES:.+]] = xor i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_xor(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_xor
+ // LLVM-LABEL: @c11_atomic_fetch_xor
+ // OGCG-LABEL: @c11_atomic_fetch_xor
+
+ return __c11_atomic_fetch_xor(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch xor seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw xor ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_fetch_nand(int *ptr, int value) {
+ // CIR-LABEL: @atomic_fetch_nand
+ // LLVM-LABEL: @atomic_fetch_nand
+ // OGCG-LABEL: @atomic_fetch_nand
+
+ return __atomic_fetch_nand(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int atomic_nand_fetch(int *ptr, int value) {
+ // CIR-LABEL: @atomic_nand_fetch
+ // LLVM-LABEL: @atomic_nand_fetch
+ // OGCG-LABEL: @atomic_nand_fetch
+
+ return __atomic_nand_fetch(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[OLD:.+]] = atomicrmw nand ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // LLVM-NEXT: %[[TMP:.+]] = and i32 %[[OLD]], %[[VAL]]
+ // LLVM-NEXT: %[[RES:.+]] = xor i32 %[[TMP]], -1
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[OLD:.+]] = atomicrmw nand ptr %{{.+}}, i32 %[[VAL:.+]] seq_cst, align 4
+ // OGCG-NEXT: %[[TMP:.+]] = and i32 %[[OLD]], %[[VAL]]
+ // OGCG-NEXT: %[[RES:.+]] = xor i32 %[[TMP]], -1
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
+
+int c11_atomic_fetch_nand(_Atomic(int) *ptr, int value) {
+ // CIR-LABEL: @c11_atomic_fetch_nand
+ // LLVM-LABEL: @c11_atomic_fetch_nand
+ // OGCG-LABEL: @c11_atomic_fetch_nand
+
+ return __c11_atomic_fetch_nand(ptr, value, __ATOMIC_SEQ_CST);
+ // CIR: %{{.+}} = cir.atomic.fetch nand seq_cst fetch_first %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+
+ // LLVM: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // LLVM-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+
+ // OGCG: %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
+ // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
+}
diff --git a/clang/test/CIR/CodeGen/builtin_inline.c b/clang/test/CIR/CodeGen/builtin_inline.c
new file mode 100644
index 0000000..83a3ba6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin_inline.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -disable-llvm-passes %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -disable-llvm-passes %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+typedef unsigned long size_t;
+
+// Normal inline builtin declaration
+// When a builtin is redefined with extern inline + always_inline attributes,
+// the compiler creates a .inline version to avoid conflicts with the builtin
+
+extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+void *memcpy(void *a, const void *b, size_t c) {
+ return __builtin_memcpy(a, b, c);
+}
+
+void *test_inline_builtin_memcpy(void *a, const void *b, size_t c) {
+ return memcpy(a, b, c);
+}
+
+// CIR: cir.func internal private{{.*}}@memcpy.inline({{.*}}) -> !cir.ptr<!void> inline(always)
+
+// CIR-LABEL: @test_inline_builtin_memcpy(
+// CIR: cir.call @memcpy.inline(
+// CIR: }
+
+// LLVM: define internal ptr @memcpy.inline(ptr{{.*}}, ptr{{.*}}, i64{{.*}}) #{{[0-9]+}}
+
+// LLVM-LABEL: @test_inline_builtin_memcpy(
+// LLVM: call ptr @memcpy.inline(
+
+// OGCG-LABEL: @test_inline_builtin_memcpy(
+// OGCG: call ptr @memcpy.inline(
+
+// OGCG: define internal ptr @memcpy.inline(ptr{{.*}} %a, ptr{{.*}} %b, i64{{.*}} %c) #{{[0-9]+}}
+
+// Shadowing case
+// When a non-inline function definition shadows an inline builtin declaration,
+// the .inline version should be replaced with the regular function and removed.
+
+extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+void *memmove(void *a, const void *b, size_t c) {
+ return __builtin_memmove(a, b, c);
+}
+
+void *memmove(void *a, const void *b, size_t c) {
+ char *dst = (char *)a;
+ const char *src = (const char *)b;
+ if (dst < src) {
+ for (size_t i = 0; i < c; i++) {
+ dst[i] = src[i];
+ }
+ } else {
+ for (size_t i = c; i > 0; i--) {
+ dst[i-1] = src[i-1];
+ }
+ }
+ return a;
+}
+
+void *test_shadowed_memmove(void *a, const void *b, size_t c) {
+ return memmove(a, b, c);
+}
+
+// CIR: cir.func{{.*}}@memmove({{.*}}) -> !cir.ptr<!void>{{.*}}{
+// CIR-NOT: @memmove.inline
+
+// CIR-LABEL: @test_shadowed_memmove(
+// CIR: cir.call @memmove(
+// CIR-NOT: @memmove.inline
+// CIR: }
+
+// LLVM: define dso_local ptr @memmove(ptr{{.*}}, ptr{{.*}}, i64{{.*}}) #{{[0-9]+}}
+// LLVM-NOT: @memmove.inline
+
+// LLVM-LABEL: @test_shadowed_memmove(
+// TODO - this deviation from OGCG is expected until we implement the nobuiltin
+// attribute. See CIRGenFunction::emitDirectCallee
+// LLVM: call ptr @memmove(
+// LLVM-NOT: @memmove.inline
+// LLVM: }
+
+// OGCG: define dso_local ptr @memmove(ptr{{.*}} %a, ptr{{.*}} %b, i64{{.*}} %c) #{{[0-9]+}}
+// OGCG-NOT: @memmove.inline
+
+// OGCG-LABEL: @test_shadowed_memmove(
+// OGCG: call void @llvm.memmove.p0.p0.i64(
+// OGCG-NOT: @memmove.inline
+// OGCG: }
diff --git a/clang/test/CIR/CodeGen/dtors.cpp b/clang/test/CIR/CodeGen/dtors.cpp
index f2c80a5..1fe048b7 100644
--- a/clang/test/CIR/CodeGen/dtors.cpp
+++ b/clang/test/CIR/CodeGen/dtors.cpp
@@ -35,7 +35,7 @@ bool make_temp(const B &) { return false; }
bool test_temp_or() { return make_temp(1) || make_temp(2); }
// CIR: cir.func{{.*}} @_Z12test_temp_orv()
-// CIR: %[[SCOPE:.*]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[REF_TMP0:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["ref.tmp0"]
// CIR: %[[ONE:.*]] = cir.const #cir.int<1>
// CIR: cir.call @_ZN1BC2Ei(%[[REF_TMP0]], %[[ONE]])
@@ -51,9 +51,9 @@ bool test_temp_or() { return make_temp(1) || make_temp(2); }
// CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP1]])
// CIR: cir.yield %[[MAKE_TEMP1]] : !cir.bool
// CIR: })
+// CIR: cir.store{{.*}} %[[TERNARY]], %[[RETVAL:.*]]
// CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP0]])
-// CIR: cir.yield %[[TERNARY]] : !cir.bool
-// CIR: } : !cir.bool
+// CIR: }
// LLVM: define{{.*}} i1 @_Z12test_temp_orv(){{.*}} {
// LLVM: %[[REF_TMP0:.*]] = alloca %struct.B
@@ -105,7 +105,7 @@ bool test_temp_or() { return make_temp(1) || make_temp(2); }
bool test_temp_and() { return make_temp(1) && make_temp(2); }
// CIR: cir.func{{.*}} @_Z13test_temp_andv()
-// CIR: %[[SCOPE:.*]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[REF_TMP0:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["ref.tmp0"]
// CIR: %[[ONE:.*]] = cir.const #cir.int<1>
// CIR: cir.call @_ZN1BC2Ei(%[[REF_TMP0]], %[[ONE]])
@@ -121,9 +121,9 @@ bool test_temp_and() { return make_temp(1) && make_temp(2); }
// CIR: %[[FALSE:.*]] = cir.const #false
// CIR: cir.yield %[[FALSE]] : !cir.bool
// CIR: })
+// CIR: cir.store{{.*}} %[[TERNARY]], %[[RETVAL:.*]]
// CIR: cir.call @_ZN1BD2Ev(%[[REF_TMP0]])
-// CIR: cir.yield %[[TERNARY]] : !cir.bool
-// CIR: } : !cir.bool
+// CIR: }
// LLVM: define{{.*}} i1 @_Z13test_temp_andv(){{.*}} {
// LLVM: %[[REF_TMP0:.*]] = alloca %struct.B
diff --git a/clang/test/CIR/CodeGen/lambda.cpp b/clang/test/CIR/CodeGen/lambda.cpp
index 0c32ceb1..91380b9 100644
--- a/clang/test/CIR/CodeGen/lambda.cpp
+++ b/clang/test/CIR/CodeGen/lambda.cpp
@@ -219,14 +219,13 @@ int f() {
// CIR: cir.func dso_local @_Z1fv() -> !s32i {{.*}} {
// CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
-// CIR: %[[SCOPE_RET:.*]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[TMP:.*]] = cir.alloca ![[REC_LAM_G2]], !cir.ptr<![[REC_LAM_G2]]>, ["ref.tmp0"]
// CIR: %[[G2:.*]] = cir.call @_Z2g2v() : () -> ![[REC_LAM_G2]]
// CIR: cir.store{{.*}} %[[G2]], %[[TMP]]
// CIR: %[[RESULT:.*]] = cir.call @_ZZ2g2vENK3$_0clEv(%[[TMP]])
-// CIR: cir.yield %[[RESULT]]
+// CIR: cir.store{{.*}} %[[RESULT]], %[[RETVAL]]
// CIR: }
-// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]]
// CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]]
// CIR: cir.return %[[RET]]
@@ -255,10 +254,9 @@ int f() {
// LLVM: %[[G2:.*]] = call %[[REC_LAM_G2]] @_Z2g2v()
// LLVM: store %[[REC_LAM_G2]] %[[G2]], ptr %[[TMP]]
// LLVM: %[[RESULT:.*]] = call i32 @"_ZZ2g2vENK3$_0clEv"(ptr %[[TMP]])
+// LLVM: store i32 %[[RESULT]], ptr %[[RETVAL]]
// LLVM: br label %[[RET_BB:.*]]
// LLVM: [[RET_BB]]:
-// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[RESULT]], %[[SCOPE_BB]] ]
-// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]]
// LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]]
// LLVM: ret i32 %[[RET]]
@@ -333,14 +331,13 @@ struct A {
// CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
// CIR: cir.store %[[THIS_ARG]], %[[THIS_ADDR]]
// CIR: %[[THIS]] = cir.load deref %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
-// CIR: %[[SCOPE_RET:.*]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[LAM_ADDR:.*]] = cir.alloca ![[REC_LAM_A]], !cir.ptr<![[REC_LAM_A]]>, ["ref.tmp0"]
// CIR: %[[STRUCT_A:.*]] = cir.get_member %[[LAM_ADDR]][0] {name = "this"} : !cir.ptr<![[REC_LAM_A]]> -> !cir.ptr<!rec_A>
// CIR: cir.call @_ZN1AC1ERKS_(%[[STRUCT_A]], %[[THIS]]){{.*}} : (!cir.ptr<!rec_A>, !cir.ptr<!rec_A>){{.*}} -> ()
// CIR: %[[LAM_RET:.*]] = cir.call @_ZZN1A3fooEvENKUlvE_clEv(%[[LAM_ADDR]])
-// CIR: cir.yield %[[LAM_RET]]
+// CIR: cir.store{{.*}} %[[LAM_RET]], %[[RETVAL]]
// CIR: }
-// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]]
// CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]]
// CIR: cir.return %[[RET]]
@@ -355,10 +352,9 @@ struct A {
// LLVM: %[[STRUCT_A:.*]] = getelementptr %[[REC_LAM_A]], ptr %[[LAM_ALLOCA]], i32 0, i32 0
// LLVM: call void @_ZN1AC1ERKS_(ptr %[[STRUCT_A]], ptr %[[THIS]])
// LLVM: %[[LAM_RET:.*]] = call i32 @_ZZN1A3fooEvENKUlvE_clEv(ptr %[[LAM_ALLOCA]])
+// LLVM: store i32 %[[LAM_RET]], ptr %[[RETVAL]]
// LLVM: br label %[[RET_BB:.*]]
// LLVM: [[RET_BB]]:
-// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[LAM_RET]], %[[SCOPE_BB]] ]
-// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]]
// LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]]
// LLVM: ret i32 %[[RET]]
@@ -407,14 +403,13 @@ struct A {
// CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
// CIR: cir.store %[[THIS_ARG]], %[[THIS_ADDR]]
// CIR: %[[THIS]] = cir.load %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
-// CIR: %[[SCOPE_RET:.*]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[LAM_ADDR:.*]] = cir.alloca ![[REC_LAM_PTR_A]], !cir.ptr<![[REC_LAM_PTR_A]]>, ["ref.tmp0"]
// CIR: %[[A_ADDR_ADDR:.*]] = cir.get_member %[[LAM_ADDR]][0] {name = "this"} : !cir.ptr<![[REC_LAM_PTR_A]]> -> !cir.ptr<!cir.ptr<!rec_A>>
// CIR: cir.store{{.*}} %[[THIS]], %[[A_ADDR_ADDR]]
// CIR: %[[LAM_RET:.*]] = cir.call @_ZZN1A3barEvENKUlvE_clEv(%[[LAM_ADDR]])
-// CIR: cir.yield %[[LAM_RET]]
+// CIR: cir.store{{.*}} %[[LAM_RET]], %[[RETVAL]]
// CIR: }
-// CIR: cir.store{{.*}} %[[SCOPE_RET]], %[[RETVAL]]
// CIR: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL]]
// CIR: cir.return %[[RET]]
@@ -429,10 +424,9 @@ struct A {
// LLVM: %[[A_ADDR_ADDR:.*]] = getelementptr %[[REC_LAM_PTR_A]], ptr %[[LAM_ALLOCA]], i32 0, i32 0
// LLVM: store ptr %[[THIS]], ptr %[[A_ADDR_ADDR]]
// LLVM: %[[LAM_RET:.*]] = call i32 @_ZZN1A3barEvENKUlvE_clEv(ptr %[[LAM_ALLOCA]])
+// LLVM: store i32 %[[LAM_RET]], ptr %[[RETVAL]]
// LLVM: br label %[[RET_BB:.*]]
// LLVM: [[RET_BB]]:
-// LLVM: %[[RETPHI:.*]] = phi i32 [ %[[LAM_RET]], %[[SCOPE_BB]] ]
-// LLVM: store i32 %[[RETPHI]], ptr %[[RETVAL]]
// LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL]]
// LLVM: ret i32 %[[RET]]
diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp
index 000ea5b..2efad10 100644
--- a/clang/test/CIR/CodeGen/new.cpp
+++ b/clang/test/CIR/CodeGen/new.cpp
@@ -208,6 +208,127 @@ void t_new_constant_size() {
// OGCG: %[[CALL:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 128)
// OGCG: store ptr %[[CALL]], ptr %[[P_ADDR]], align 8
+class C {
+ public:
+ ~C();
+};
+
+void t_constant_size_nontrivial() {
+ auto p = new C[3];
+}
+
+// CHECK: cir.func{{.*}} @_Z26t_constant_size_nontrivialv()
+// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["p", init] {alignment = 8 : i64}
+// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i
+// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<3> : !u64i
+// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<11> : !u64i
+// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_BASE]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i>
+// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i
+// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void>
+// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_C>
+// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK: cir.return
+// CHECK: }
+
+// LLVM: @_Z26t_constant_size_nontrivialv()
+// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 11)
+// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8
+// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[COOKIE_PTR]], i64 8
+// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+// OGCG: @_Z26t_constant_size_nontrivialv()
+// OGCG: %[[ALLOCA:.*]] = alloca ptr, align 8
+// OGCG: %[[COOKIE_PTR:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 11)
+// OGCG: store i64 3, ptr %[[COOKIE_PTR]], align 8
+// OGCG: %[[ALLOCATED_PTR:.*]] = getelementptr inbounds i8, ptr %[[COOKIE_PTR]], i64 8
+// OGCG: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+class D {
+ public:
+ int x;
+ ~D();
+};
+
+void t_constant_size_nontrivial2() {
+ auto p = new D[3];
+}
+
+// In this test SIZE_WITHOUT_COOKIE isn't used, but it would be if there were
+// an initializer.
+
+// CHECK: cir.func{{.*}} @_Z27t_constant_size_nontrivial2v()
+// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>, ["p", init] {alignment = 8 : i64}
+// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i
+// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<12> : !u64i
+// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<20> : !u64i
+// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_BASE]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i>
+// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i
+// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void>
+// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_D>
+// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>
+// CHECK: cir.return
+// CHECK: }
+
+// LLVM: @_Z27t_constant_size_nontrivial2v()
+// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 20)
+// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8
+// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[COOKIE_PTR]], i64 8
+// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+struct alignas(16) E {
+ int x;
+ ~E();
+};
+
+void t_align16_nontrivial() {
+ auto p = new E[2];
+}
+
+// CHECK: cir.func{{.*}} @_Z20t_align16_nontrivialv()
+// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>, ["p", init] {alignment = 8 : i64}
+// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<2> : !u64i
+// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<32> : !u64i
+// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<48> : !u64i
+// CHECK: %[[RAW_PTR:.*]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK: %[[COOKIE_PTR_BASE:.*]] = cir.cast bitcast %[[RAW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[COOKIE_OFFSET:.*]] = cir.const #cir.int<8> : !s32i
+// CHECK: %[[COOKIE_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[COOKIE_OFFSET]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[COOKIE_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!u64i>
+// CHECK: cir.store align(8) %[[#NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<16> : !s32i
+// CHECK: %[[DATA_PTR_RAW:.*]] = cir.ptr_stride %[[COOKIE_PTR_BASE]], %[[#COOKIE_SIZE]] : (!cir.ptr<!cir.ptr<!u8i>>, !s32i) -> !cir.ptr<!cir.ptr<!u8i>>
+// CHECK: %[[DATA_PTR_VOID:.*]] = cir.cast bitcast %[[DATA_PTR_RAW]] : !cir.ptr<!cir.ptr<!u8i>> -> !cir.ptr<!void>
+// CHECK: %[[DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR_VOID]] : !cir.ptr<!void> -> !cir.ptr<!rec_E>
+// CHECK: cir.store align(8) %[[DATA_PTR]], %[[P_ADDR]] : !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>
+// CHECK: cir.return
+// CHECK: }
+
+// LLVM: @_Z20t_align16_nontrivialv()
+// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[RAW_PTR:.*]] = call ptr @_Znam(i64 48)
+// LLVM: %[[COOKIE_PTR:.*]] = getelementptr ptr, ptr %[[RAW_PTR]], i64 8
+// LLVM: store i64 2, ptr %[[COOKIE_PTR]], align 8
+// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr ptr, ptr %[[RAW_PTR]], i64 16
+// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+// OGCG: define{{.*}} void @_Z20t_align16_nontrivialv
+// OGCG: %[[ALLOCA:.*]] = alloca ptr, align 8
+// OGCG: %[[RAW_PTR:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 48)
+// OGCG: %[[COOKIE_PTR:.*]] = getelementptr inbounds i8, ptr %[[RAW_PTR]], i64 8
+// OGCG: store i64 2, ptr %[[COOKIE_PTR]], align 8
+// OGCG: %[[ALLOCATED_PTR:.*]] = getelementptr inbounds i8, ptr %[[RAW_PTR]], i64 16
+// OGCG: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+// OGCG: ret void
void t_new_multidim_constant_size() {
auto p = new double[2][3][4];
diff --git a/clang/test/CIR/CodeGen/statement-exprs.c b/clang/test/CIR/CodeGen/statement-exprs.c
index f6ec9ec..c784ec9 100644
--- a/clang/test/CIR/CodeGen/statement-exprs.c
+++ b/clang/test/CIR/CodeGen/statement-exprs.c
@@ -218,7 +218,7 @@ struct S { int x; };
int test3() { return ({ struct S s = {1}; s; }).x; }
// CIR: cir.func no_proto dso_local @test3() -> !s32i
// CIR: %[[RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
-// CIR: %[[YIELDVAL:.+]] = cir.scope {
+// CIR: cir.scope {
// CIR: %[[REF_TMP0:.+]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["ref.tmp0"]
// CIR: %[[TMP:.+]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["tmp"]
// CIR: cir.scope {
@@ -230,9 +230,8 @@ int test3() { return ({ struct S s = {1}; s; }).x; }
// CIR: }
// CIR: %[[GEP_X_TMP:.+]] = cir.get_member %[[REF_TMP0]][0] {name = "x"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
// CIR: %[[XVAL:.+]] = cir.load {{.*}} %[[GEP_X_TMP]] : !cir.ptr<!s32i>, !s32i
-// CIR: cir.yield %[[XVAL]] : !s32i
-// CIR: } : !s32i
-// CIR: cir.store %[[YIELDVAL]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.store{{.*}} %[[XVAL]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR: }
// CIR: %[[RES:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i
// CIR: cir.return %[[RES]] : !s32i
@@ -252,10 +251,9 @@ int test3() { return ({ struct S s = {1}; s; }).x; }
// LLVM: [[LBL8]]:
// LLVM: %[[GEP_VAR1:.+]] = getelementptr %struct.S, ptr %[[VAR1]], i32 0, i32 0
// LLVM: %[[LOAD_X:.+]] = load i32, ptr %[[GEP_VAR1]]
+// LLVM: store i32 %[[LOAD_X]], ptr %[[VAR4]]
// LLVM: br label %[[LBL11:.+]]
// LLVM: [[LBL11]]:
-// LLVM: %[[PHI:.+]] = phi i32 [ %[[LOAD_X]], %[[LBL8]] ]
-// LLVM: store i32 %[[PHI]], ptr %[[VAR4]]
// LLVM: %[[RES:.+]] = load i32, ptr %[[VAR4]]
// LLVM: ret i32 %[[RES]]
diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp
index 6d362c7..c8db714 100644
--- a/clang/test/CIR/CodeGen/struct.cpp
+++ b/clang/test/CIR/CodeGen/struct.cpp
@@ -280,3 +280,67 @@ void bin_comma() {
// OGCG: define{{.*}} void @_Z9bin_commav()
// OGCG: %[[A_ADDR:.*]] = alloca %struct.CompleteS, align 4
// OGCG: call void @llvm.memset.p0.i64(ptr align 4 %[[A_ADDR]], i8 0, i64 8, i1 false)
+
+void compound_literal_expr() { CompleteS a = (CompleteS){}; }
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a", init]
+// CIR: %[[A_ELEM_0_PTR:.*]] = cir.get_member %[[A_ADDR]][0] {name = "a"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s32i>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i
+// CIR: cir.store{{.*}} %[[CONST_0]], %[[A_ELEM_0_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[A_ELEM_1_PTR:.*]] = cir.get_member %[[A_ADDR]][1] {name = "b"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s8i>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s8i
+// CIR: cir.store{{.*}} %[[CONST_0]], %[[A_ELEM_1_PTR]] : !s8i, !cir.ptr<!s8i>
+
+// TODO(cir): zero-initialize the padding
+
+// LLVM: %[[A_ADDR:.*]] = alloca %struct.CompleteS, i64 1, align 4
+// LLVM: %[[A_ELEM_0_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[A_ADDR]], i32 0, i32 0
+// LLVM: store i32 0, ptr %[[A_ELEM_0_PTR]], align 4
+// LLVM: %[[A_ELEM_1_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[A_ADDR]], i32 0, i32 1
+// LLVM: store i8 0, ptr %[[A_ELEM_1_PTR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca %struct.CompleteS, align 4
+// OGCG: call void @llvm.memset.p0.i64(ptr align 4 %[[A_ADDR]], i8 0, i64 8, i1 false)
+
+struct StructWithConstMember {
+ int a : 1;
+};
+
+void struct_with_const_member_expr() {
+ int a = (StructWithConstMember){}.a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CIR: %[[RESULT:.*]] = cir.scope {
+// CIR: %[[REF_ADDR:.*]] = cir.alloca !rec_StructWithConstMember, !cir.ptr<!rec_StructWithConstMember>, ["ref.tmp0"]
+// CIR: %[[ELEM_0_PTR:.*]] = cir.get_member %[[REF_ADDR]][0] {name = "a"} : !cir.ptr<!rec_StructWithConstMember> -> !cir.ptr<!u8i>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i
+// CIR: %[[SET_BF:.*]] = cir.set_bitfield{{.*}} (#bfi_a, %[[ELEM_0_PTR]] : !cir.ptr<!u8i>, %[[CONST_0]] : !s32i) -> !s32i
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i
+// CIR: cir.yield %[[CONST_0]] : !s32i
+// CIR: } : !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// TODO(cir): zero-initialize the padding
+
+// LLVM: %[[REF_ADDR:.*]] = alloca %struct.StructWithConstMember, i64 1, align 4
+// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: br label %[[BF_LABEL:.*]]
+// LLVM: [[BF_LABEL]]:
+// LLVM: %[[ELEM_0_PTR:.*]] = getelementptr %struct.StructWithConstMember, ptr %[[REF_ADDR]], i32 0, i32 0
+// LLVM: %[[TMP_REF:.*]] = load i8, ptr %[[ELEM_0_PTR]], align 4
+// LLVM: %[[BF_CLEAR:.*]] = and i8 %[[TMP_REF]], -2
+// LLVM: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], 0
+// LLVM: store i8 %[[BF_SET]], ptr %[[ELEM_0_PTR]], align 4
+// LLVM: br label %[[RESULT_LABEL:.*]]
+// LLVM: [[RESULT_LABEL]]:
+// LLVM: %[[RESULT:.*]] = phi i32 [ 0, %[[BF_LABEL]] ]
+// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[REF_ADDR:.*]] = alloca %struct.StructWithConstMember, align 4
+// OGCG: %[[TMP_REF:.*]] = load i8, ptr %[[REF_ADDR]], align 4
+// OGCG: %[[BF_CLEAR:.*]] = and i8 %[[TMP_REF]], -2
+// OGCG: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], 0
+// OGCG: store i8 %[[BF_SET]], ptr %[[REF_ADDR]], align 4
+// OGCG: store i32 0, ptr %[[A_ADDR]], align 4
diff --git a/clang/test/CIR/CodeGen/try-catch.cpp b/clang/test/CIR/CodeGen/try-catch.cpp
index 8f0b3c4..5a50310 100644
--- a/clang/test/CIR/CodeGen/try-catch.cpp
+++ b/clang/test/CIR/CodeGen/try-catch.cpp
@@ -30,3 +30,90 @@ void empty_try_block_with_catch_with_int_exception() {
// OGCG: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv()
// OGCG: ret void
+
+void try_catch_with_empty_catch_all() {
+ int a = 1;
+ try {
+ return;
+ ++a;
+ } catch (...) {
+ }
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: cir.store{{.*}} %[[CONST_1]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i
+// CIR: cir.scope {
+// CIR: cir.try {
+// CIR: cir.return
+// CIR: ^bb1: // no predecessors
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR: %[[RESULT:.*]] = cir.unary(inc, %[[TMP_A]]) nsw : !s32i, !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.yield
+// CIR: }
+// CIR: }
+
+// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 1, ptr %[[A_ADDR]], align 4
+// LLVM: br label %[[BB_2:.*]]
+// LLVM: [[BB_2]]:
+// LLVM: br label %[[BB_3:.*]]
+// LLVM: [[BB_3]]:
+// LLVM: ret void
+// LLVM: [[BB_4:.*]]:
+// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM: %[[RESULT:.*]] = add nsw i32 %[[TMP_A]], 1
+// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4
+// LLVM: br label %[[BB_7:.*]]
+// LLVM: [[BB_7]]:
+// LLVM: br label %[[BB_8:.*]]
+// LLVM: [[BB_8]]:
+// LLVM: ret void
+
+// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG: store i32 1, ptr %[[A_ADDR]], align 4
+// OGCG: ret void
+
+void try_catch_with_empty_catch_all_2() {
+ int a = 1;
+ try {
+ ++a;
+ return;
+ } catch (...) {
+ }
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: cir.store{{.*}} %[[CONST_1]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.scope {
+// CIR: cir.try {
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR: %[[RESULT:.*]] = cir.unary(inc, %[[TMP_A]]) nsw : !s32i, !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.return
+// CIR: }
+// CIR: }
+
+// LLVM: %[[A_ADDR]] = alloca i32, i64 1, align 4
+// LLVM: store i32 1, ptr %[[A_ADDR]], align 4
+// LLVM: br label %[[BB_2:.*]]
+// LLVM: [[BB_2]]:
+// LLVM: br label %[[BB_3:.*]]
+// LLVM: [[BB_3]]:
+// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM: %[[RESULT:.*]] = add nsw i32 %[[TMP_A:.*]], 1
+// LLVM: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4
+// LLVM: ret void
+// LLVM: [[BB_6:.*]]:
+// LLVM: br label %[[BB_7:.*]]
+// LLVM: [[BB_7]]:
+// LLVM: ret void
+
+// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG: store i32 1, ptr %[[A_ADDR]], align 4
+// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG: %[[RESULT:.*]] = add nsw i32 %[[TMP_A]], 1
+// OGCG: store i32 %[[RESULT]], ptr %[[A_ADDR]], align 4
+// OGCG: ret void
diff --git a/clang/test/CIR/CodeGen/vla.c b/clang/test/CIR/CodeGen/vla.c
index b22c704..0af4f83 100644
--- a/clang/test/CIR/CodeGen/vla.c
+++ b/clang/test/CIR/CodeGen/vla.c
@@ -282,4 +282,61 @@ void f3(unsigned len) {
// break;
// }
// }
- \ No newline at end of file
+
+int f5(unsigned long len) {
+ int arr[len];
+ return arr[2];
+}
+
+// CIR: cir.func{{.*}} @f5(%[[LEN_ARG:.*]]: !u64i {{.*}}) -> !s32i
+// CIR: %[[LEN_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["len", init]
+// CIR: %[[RET_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR: %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR: cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]]
+// CIR: %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR: %[[STACK_PTR:.*]] = cir.stacksave
+// CIR: cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]]
+// CIR: %[[ARR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[LEN]] : !u64i, ["arr"]
+// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+// CIR: %[[ARR_2:.*]] = cir.ptr_stride %[[ARR]], %[[TWO]]
+// CIR: %[[ARR_VAL:.*]] = cir.load{{.*}} %[[ARR_2]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.store{{.*}} %[[ARR_VAL]], %[[RET_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]]
+// CIR: cir.stackrestore %[[STACK_RESTORE_PTR]]
+// CIR: %[[RET_VAL:.*]] = cir.load{{.*}} %[[RET_ADDR]]
+// CIR: cir.return %[[RET_VAL]] : !s32i
+
+// LLVM: define{{.*}} i32 @f5(i64 %[[LEN_ARG:.*]])
+// LLVM: %[[LEN_ADDR:.*]] = alloca i64
+// LLVM: %[[RET_ADDR:.*]] = alloca i32
+// LLVM: %[[SAVED_STACK:.*]] = alloca ptr
+// LLVM: store i64 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// LLVM: %[[LEN:.*]] = load i64, ptr %[[LEN_ADDR]]
+// LLVM: %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM: store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// LLVM: %[[ARR:.*]] = alloca i32, i64 %[[LEN]]
+// LLVM: %[[ARR_2:.*]] = getelementptr i32, ptr %[[ARR]], i64 2
+// LLVM: %[[ARR_VAL:.*]] = load i32, ptr %[[ARR_2]]
+// LLVM: store i32 %[[ARR_VAL]], ptr %[[RET_ADDR]]
+// LLVM: %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// LLVM: call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+// LLVM: %[[RET_VAL:.*]] = load i32, ptr %[[RET_ADDR]]
+// LLVM: ret i32 %[[RET_VAL]]
+
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+
+// OGCG: define{{.*}} i32 @f5(i64 {{.*}} %[[LEN_ARG:.*]])
+// OGCG: %[[LEN_ADDR:.*]] = alloca i64
+// OGCG: %[[SAVED_STACK:.*]] = alloca ptr
+// OGCG: %[[VLA_EXPR0:.*]] = alloca i64
+// OGCG: store i64 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// OGCG: %[[LEN:.*]] = load i64, ptr %[[LEN_ADDR]]
+// OGCG: %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG: store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// OGCG: %[[ARR:.*]] = alloca i32, i64 %[[LEN]]
+// OGCG: store i64 %[[LEN]], ptr %[[VLA_EXPR0]]
+// OGCG: %[[ARR_2:.*]] = getelementptr inbounds i32, ptr %[[ARR]], i64 2
+// OGCG: %[[ARR_VAL:.*]] = load i32, ptr %[[ARR_2]]
+// OGCG: %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// OGCG: call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+// OGCG: ret i32 %[[ARR_VAL]]
diff --git a/clang/test/CIR/IR/invalid-atomic.cir b/clang/test/CIR/IR/invalid-atomic.cir
new file mode 100644
index 0000000..a124e43
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-atomic.cir
@@ -0,0 +1,7 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+cir.func @f1(%arg0: !cir.ptr<!cir.float>, %arg1: !cir.float) {
+ // expected-error @below {{only atomic add, sub, max, and min operation could operate on floating-point values}}
+ %0 = cir.atomic.fetch and seq_cst %arg0, %arg1 : (!cir.ptr<!cir.float>, !cir.float) -> !cir.float
+ cir.return
+}
diff --git a/clang/test/Driver/arm-abi.c b/clang/test/Driver/arm-abi.c
index 139456c..b89b969 100644
--- a/clang/test/Driver/arm-abi.c
+++ b/clang/test/Driver/arm-abi.c
@@ -31,6 +31,8 @@
// FreeBSD / OpenBSD default to aapcs-linux
// RUN: %clang -target arm--freebsd- %s -### -o %t.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s
+// RUN: %clang -target arm--fuchsia- %s -### -o %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s
// RUN: %clang -target arm--openbsd- %s -### -o %t.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-AAPCS-LINUX %s
// RUN: %clang -target arm--haiku- %s -### -o %t.o 2>&1 \
diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c
index cf92f85..3fb2a94 100644
--- a/clang/test/Driver/fuchsia.c
+++ b/clang/test/Driver/fuchsia.c
@@ -2,6 +2,10 @@
// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
// RUN: | FileCheck -check-prefixes=CHECK,CHECK-X86_64 %s
+// RUN: %clang -### %s --target=arm-unknown-fuchsia \
+// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARMV8A %s
// RUN: %clang -### %s --target=aarch64-unknown-fuchsia \
// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
@@ -14,6 +18,10 @@
// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
// RUN: | FileCheck -check-prefixes=CHECK,CHECK-X86_64 %s
+// RUN: %clang -### %s --target=arm-fuchsia \
+// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARMV8A %s
// RUN: %clang -### %s --target=aarch64-fuchsia \
// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
// RUN: --sysroot=%S/platform -fuse-ld=ld 2>&1 \
@@ -24,6 +32,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,CHECK-RISCV64 %s
// CHECK: "-cc1"
// CHECK-X86_64: "-triple" "x86_64-unknown-fuchsia"
+// CHECK-ARMV8A: "-triple" "thumbv8a-unknown-fuchsia"
// CHECK-AARCH64: "-triple" "aarch64-unknown-fuchsia"
// CHECK-RISCV64: "-triple" "riscv64-unknown-fuchsia"
// CHECK: "-funwind-tables=2"
diff --git a/clang/test/Preprocessor/riscv-atomics.c b/clang/test/Preprocessor/riscv-atomics.c
new file mode 100644
index 0000000..6e02173
--- /dev/null
+++ b/clang/test/Preprocessor/riscv-atomics.c
@@ -0,0 +1,24 @@
+// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32ia -x c -E -dM %s \
+// RUN: -o - | FileCheck %s
+// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i_zalrsc -x c -E \
+// RUN: -dM %s -o - | FileCheck %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64ia -x c -E -dM %s \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i_zalrsc -x c -E \
+// RUN: -dM %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
+
+// CHECK: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_INT_LOCK_FREE 2
+// CHECK-RV64: #define __GCC_ATOMIC_LLONG_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// CHECK: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// CHECK: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
+// CHECK: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
+// CHECK-RV64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp
index 4827cc1..cae9265 100644
--- a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp
@@ -2453,6 +2453,167 @@ TEST_P(UncheckedStatusOrAccessModelTest, SubclassOperator) {
)cc");
}
+TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusCheck) {
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor) {
+ if (sor.status().ok())
+ sor.value();
+ else
+ sor.value(); // [[unsafe]]
+ }
+ )cc");
+}
+
+TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusRefCheck) {
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor) {
+ const STATUS& s = sor.status();
+ if (s.ok())
+ sor.value();
+ else
+ sor.value(); // [[unsafe]]
+ }
+ )cc");
+}
+
+TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithStatusPtrCheck) {
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor) {
+ const STATUS* s = &sor.status();
+ if (s->ok())
+ sor.value();
+ else
+ sor.value(); // [[unsafe]]
+ }
+ )cc");
+}
+
+TEST_P(UncheckedStatusOrAccessModelTest, UnwrapValueWithMovedStatus) {
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor) {
+ if (std::move(sor.status()).ok())
+ sor.value();
+ else
+ sor.value(); // [[unsafe]]
+ }
+ )cc");
+}
+
+TEST_P(UncheckedStatusOrAccessModelTest, MembersUsedInsideStatus) {
+ ExpectDiagnosticsFor(R"cc(
+ namespace absl {
+
+ class Status {
+ public:
+ bool ok() const;
+
+ void target() const { ok(); }
+ };
+
+ } // namespace absl
+ )cc");
+}
+
+TEST_P(UncheckedStatusOrAccessModelTest, StatusUpdate) {
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor) {
+ STATUS s;
+ s.Update(sor.status());
+ if (s.ok())
+ sor.value();
+ else
+ sor.value(); // [[unsafe]]
+ }
+ )cc");
+
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor1, STATUSOR_INT sor2) {
+ STATUS s;
+ s.Update(sor1.status());
+ s.Update(sor2.status());
+ if (s.ok()) {
+ sor1.value();
+ sor2.value();
+ }
+ }
+ )cc");
+
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor1, STATUSOR_INT sor2) {
+ STATUS s;
+ s.Update(sor1.status());
+ CHECK(s.ok());
+ s.Update(sor2.status());
+ sor1.value();
+ sor2.value(); // [[unsafe]]
+ }
+ )cc");
+
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor1, STATUSOR_INT sor2) {
+ STATUS s;
+ s.Update(sor1.status());
+ CHECK(s.ok());
+ sor1.value();
+ sor2.value(); // [[unsafe]]
+ }
+ )cc");
+
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor1, STATUSOR_INT sor2) {
+ STATUS s;
+ STATUS sor1_status = sor1.status();
+ s.Update(std::move(sor1_status));
+ CHECK(s.ok());
+ sor1.value();
+ sor2.value(); // [[unsafe]]
+ }
+ )cc");
+
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ void target(STATUSOR_INT sor1, STATUSOR_INT sor2) {
+ STATUS s;
+ STATUS sor1_status = sor1.status();
+ sor1_status.Update(sor2.status());
+ s.Update(std::move(sor1_status));
+ CHECK(s.ok());
+ sor1.value();
+ sor2.value();
+ }
+ )cc");
+ ExpectDiagnosticsFor(R"cc(
+#include "unchecked_statusor_access_test_defs.h"
+
+ const STATUS& OptStatus();
+
+ void target(STATUSOR_INT sor) {
+ auto s = sor.status();
+ s.Update(OptStatus());
+ if (s.ok()) sor.value();
+ }
+ )cc");
+}
+
} // namespace
std::string
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index fc80bf4..6b433bb 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -839,6 +839,25 @@ TEST_F(FormatTestComments, MultiLineCommentsInDefines) {
getLLVMStyleWithColumns(17)));
}
+TEST_F(FormatTestComments, LineCommentsInMacrosDoNotGetEscapedNewlines) {
+ FormatStyle Style = getLLVMStyleWithColumns(0);
+ Style.ReflowComments = FormatStyle::RCS_Never;
+ verifyFormat("#define FOO (1U) // comment\n"
+ " // comment",
+ Style);
+
+ Style.ColumnLimit = 32;
+ verifyFormat("#define SOME_MACRO(x) x\n"
+ "#define FOO \\\n"
+ " SOME_MACRO(1) + \\\n"
+ " SOME_MACRO(2) // comment\n"
+ " // comment",
+ "#define SOME_MACRO(x) x\n"
+ "#define FOO SOME_MACRO(1) + SOME_MACRO(2) // comment\n"
+ " // comment",
+ Style);
+}
+
TEST_F(FormatTestComments, ParsesCommentsAdjacentToPPDirectives) {
EXPECT_EQ("namespace {}\n// Test\n#define A",
format("namespace {}\n // Test\n#define A"));
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index b86bb1b..eaff813 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -117,14 +117,22 @@ include(CompilerRTDarwinUtils)
if(APPLE)
find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx)
- find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
- find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
- find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator)
- find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos)
- find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
- find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
- find_darwin_sdk_dir(DARWIN_xrossim_SYSROOT xrsimulator)
- find_darwin_sdk_dir(DARWIN_xros_SYSROOT xros)
+ if(COMPILER_RT_ENABLE_IOS)
+ find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
+ find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
+ endif()
+ if(COMPILER_RT_ENABLE_WATCHOS)
+ find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator)
+ find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos)
+ endif()
+ if(COMPILER_RT_ENABLE_TVOS)
+ find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
+ find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
+ endif()
+ if(COMPILER_RT_ENABLE_XROS)
+ find_darwin_sdk_dir(DARWIN_xrossim_SYSROOT xrsimulator)
+ find_darwin_sdk_dir(DARWIN_xros_SYSROOT xros)
+ endif()
# Get supported architecture from SDKSettings.
function(sdk_has_arch_support sdk_path os arch has_support)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 67db438..8dfbdec 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -408,12 +408,18 @@ if(APPLE)
include(CompilerRTDarwinUtils)
find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx)
- find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
- find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
- find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator)
- find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos)
- find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
- find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
+ if(COMPILER_RT_ENABLE_IOS)
+ find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
+ find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
+ endif()
+ if(COMPILER_RT_ENABLE_WATCHOS)
+ find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator)
+ find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos)
+ endif()
+ if(COMPILER_RT_ENABLE_TVOS)
+ find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
+ find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
+ endif()
if(NOT DARWIN_osx_SYSROOT)
message(WARNING "Could not determine OS X sysroot, trying /usr/include")
diff --git a/compiler-rt/lib/asan/asan_rtl_x86_64.S b/compiler-rt/lib/asan/asan_rtl_x86_64.S
index 9c52898..5ee830d 100644
--- a/compiler-rt/lib/asan/asan_rtl_x86_64.S
+++ b/compiler-rt/lib/asan/asan_rtl_x86_64.S
@@ -5,6 +5,7 @@
#include "sanitizer_common/sanitizer_platform.h"
.file "asan_rtl_x86_64.S"
+.att_syntax
#define NAME(n, reg, op, s, i) n##_##op##_##i##_##s##_##reg
diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h
index d1e5328..79a45d91 100644
--- a/compiler-rt/lib/builtins/assembly.h
+++ b/compiler-rt/lib/builtins/assembly.h
@@ -337,4 +337,8 @@
#endif
#endif
+#if defined(__i386__) || defined(__amd64__)
+.att_syntax
+#endif
+
#endif // COMPILERRT_ASSEMBLY_H
diff --git a/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S
index 9804e8d..a5379d3 100644
--- a/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S
+++ b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S
@@ -30,6 +30,7 @@
.section .text
.file "hwasan_setjmp_x86_64.S"
+.att_syntax
.global ASM_WRAPPER_NAME(setjmp)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(setjmp))
diff --git a/compiler-rt/lib/orc/elfnix_tls.x86-64.S b/compiler-rt/lib/orc/elfnix_tls.x86-64.S
index b3e0bef..da20212 100644
--- a/compiler-rt/lib/orc/elfnix_tls.x86-64.S
+++ b/compiler-rt/lib/orc/elfnix_tls.x86-64.S
@@ -13,6 +13,7 @@
// The content of this file is x86_64-only
#if defined(__x86_64__)
+.att_syntax
#define REGISTER_SAVE_SPACE_SIZE 512
diff --git a/compiler-rt/lib/orc/sysv_reenter.x86-64.S b/compiler-rt/lib/orc/sysv_reenter.x86-64.S
index 0a36280..99615c0 100644
--- a/compiler-rt/lib/orc/sysv_reenter.x86-64.S
+++ b/compiler-rt/lib/orc/sysv_reenter.x86-64.S
@@ -12,6 +12,7 @@
// The content of this file is x86_64-only
#if defined(__x86_64__)
+.att_syntax
// Save all GRPS except %rsp.
// This value is also subtracted from %rsp below, despite the fact that %rbp
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
index c633014..5ef090c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
@@ -2,6 +2,8 @@
#include "sanitizer_common/sanitizer_asm.h"
+.att_syntax
+
.comm _ZN14__interception10real_vforkE,4,4
.globl ASM_WRAPPER_NAME(vfork)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
index 5500f81..9c85407 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
@@ -2,6 +2,8 @@
#include "sanitizer_common/sanitizer_asm.h"
+.att_syntax
+
.comm _ZN14__interception10real_vforkE,8,8
.globl ASM_WRAPPER_NAME(vfork)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
index f848be9..8b9b706 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -3,6 +3,8 @@
#include "sanitizer_common/sanitizer_asm.h"
+.att_syntax
+
#if !defined(__APPLE__)
.section .text
#else
diff --git a/flang-rt/lib/cuda/kernel.cpp b/flang-rt/lib/cuda/kernel.cpp
index c52d039..296f4b7 100644
--- a/flang-rt/lib/cuda/kernel.cpp
+++ b/flang-rt/lib/cuda/kernel.cpp
@@ -23,9 +23,9 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
gridDim.y = gridY;
gridDim.z = gridZ;
dim3 blockDim;
- blockDim.x = blockX > 1024 ? 1024 : blockX;
- blockDim.y = blockY > 1024 ? 1024 : blockY;
- blockDim.z = blockZ > 64 ? 64 : blockZ;
+ blockDim.x = blockX;
+ blockDim.y = blockY;
+ blockDim.z = blockZ;
unsigned nbNegGridDim{0};
if (gridX < 0) {
++nbNegGridDim;
@@ -76,8 +76,8 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
terminator.Crash("Too many invalid grid dimensions");
}
cudaStream_t defaultStream = 0;
- CUDA_REPORT_IF_ERROR(cudaLaunchKernel(kernel, gridDim, blockDim, params, smem,
- stream != nullptr ? (cudaStream_t)(*stream) : defaultStream));
+ cudaLaunchKernel(kernel, gridDim, blockDim, params, smem,
+ stream != nullptr ? (cudaStream_t)(*stream) : defaultStream);
}
void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
@@ -88,9 +88,9 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
config.gridDim.x = gridX;
config.gridDim.y = gridY;
config.gridDim.z = gridZ;
- config.blockDim.x = blockX > 1024 ? 1024 : blockX;
- config.blockDim.y = blockY > 1024 ? 1024 : blockY;
- config.blockDim.z = blockZ > 64 ? 64 : blockZ;
+ config.blockDim.x = blockX;
+ config.blockDim.y = blockY;
+ config.blockDim.z = blockZ;
unsigned nbNegGridDim{0};
if (gridX < 0) {
++nbNegGridDim;
@@ -153,7 +153,7 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
launchAttr[0].val.clusterDim.z = clusterZ;
config.numAttrs = 1;
config.attrs = launchAttr;
- CUDA_REPORT_IF_ERROR(cudaLaunchKernelExC(&config, kernel, params));
+ cudaLaunchKernelExC(&config, kernel, params);
}
void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX,
@@ -165,9 +165,9 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX,
gridDim.y = gridY;
gridDim.z = gridZ;
dim3 blockDim;
- blockDim.x = blockX > 1024 ? 1024 : blockX;
- blockDim.y = blockY > 1024 ? 1024 : blockY;
- blockDim.z = blockZ > 64 ? 64 : blockZ;
+ blockDim.x = blockX;
+ blockDim.y = blockY;
+ blockDim.z = blockZ;
unsigned nbNegGridDim{0};
if (gridX < 0) {
++nbNegGridDim;
@@ -218,8 +218,8 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX,
terminator.Crash("Too many invalid grid dimensions");
}
cudaStream_t defaultStream = 0;
- CUDA_REPORT_IF_ERROR(cudaLaunchCooperativeKernel(kernel, gridDim, blockDim,
- params, smem, stream != nullptr ? (cudaStream_t)*stream : defaultStream));
+ cudaLaunchCooperativeKernel(kernel, gridDim, blockDim, params, smem,
+ stream != nullptr ? (cudaStream_t)*stream : defaultStream);
}
} // extern "C"
diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md
index 17083b4..8d9daa4 100644
--- a/flang/docs/FortranLLVMTestSuite.md
+++ b/flang/docs/FortranLLVMTestSuite.md
@@ -73,5 +73,3 @@ instructions described [above](#running-the-llvm-test-suite-with-fortran).
There are additional configure-time options that can be used with the gfortran
tests. More details about those options and their purpose can be found in
[`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md).
-
- These tests are Free Software and are shared under the terms of the GNU General Public License (GPL). For more details, please see the accompanying [`LICENSE`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/LICENSE.txt) file.
diff --git a/flang/include/flang/Lower/DirectivesCommon.h b/flang/include/flang/Lower/DirectivesCommon.h
index 6ed3c1b..2d69067 100644
--- a/flang/include/flang/Lower/DirectivesCommon.h
+++ b/flang/include/flang/Lower/DirectivesCommon.h
@@ -39,7 +39,6 @@
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Value.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include <list>
#include <type_traits>
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index f7910ad..c03d0a0 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -262,6 +262,7 @@ public:
const Scope &FindScope(parser::CharBlock) const;
Scope &FindScope(parser::CharBlock);
void UpdateScopeIndex(Scope &, parser::CharBlock);
+ void DumpScopeIndex(llvm::raw_ostream &) const;
bool IsInModuleFile(parser::CharBlock) const;
diff --git a/flang/include/flang/Utils/OpenMP.h b/flang/include/flang/Utils/OpenMP.h
index 01a94c9..bad0abb 100644
--- a/flang/include/flang/Utils/OpenMP.h
+++ b/flang/include/flang/Utils/OpenMP.h
@@ -29,8 +29,9 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder,
mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr,
llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
- uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
- mlir::Type retTy, bool partialMap = false,
+ mlir::omp::ClauseMapFlags mapType,
+ mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
+ bool partialMap = false,
mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr());
/// For an mlir value that does not have storage, allocate temporary storage
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index b3e8b69..af4f420 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -718,6 +718,84 @@ static void genDataOperandOperations(
}
}
+template <typename GlobalCtorOrDtorOp, typename EntryOp, typename DeclareOp,
+ typename ExitOp>
+static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
+ fir::FirOpBuilder &builder,
+ mlir::Location loc, fir::GlobalOp globalOp,
+ mlir::acc::DataClause clause,
+ const std::string &declareGlobalName,
+ bool implicit, std::stringstream &asFortran) {
+ GlobalCtorOrDtorOp declareGlobalOp =
+ GlobalCtorOrDtorOp::create(modBuilder, loc, declareGlobalName);
+ builder.createBlock(&declareGlobalOp.getRegion(),
+ declareGlobalOp.getRegion().end(), {}, {});
+ builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back());
+
+ fir::AddrOfOp addrOp = fir::AddrOfOp::create(
+ builder, loc, fir::ReferenceType::get(globalOp.getType()),
+ globalOp.getSymbol());
+ addDeclareAttr(builder, addrOp, clause);
+
+ llvm::SmallVector<mlir::Value> bounds;
+ EntryOp entryOp = createDataEntryOp<EntryOp>(
+ builder, loc, addrOp.getResTy(), asFortran, bounds,
+ /*structured=*/false, implicit, clause, addrOp.getResTy().getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>)
+ DeclareOp::create(builder, loc,
+ mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+ mlir::ValueRange(entryOp.getAccVar()));
+ else
+ DeclareOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(entryOp.getAccVar()));
+ if constexpr (std::is_same_v<GlobalCtorOrDtorOp,
+ mlir::acc::GlobalDestructorOp>) {
+ if constexpr (std::is_same_v<ExitOp, mlir::acc::DeclareLinkOp>) {
+ // No destructor emission for declare link in this path to avoid
+ // complex var/varType/varPtrPtr signatures. The ctor registers the link.
+ } else if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
+ std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>) {
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getVar(), entryOp.getVarType(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ } else {
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ }
+ }
+ mlir::acc::TerminatorOp::create(builder, loc);
+ modBuilder.setInsertionPointAfter(declareGlobalOp);
+}
+
+template <typename EntryOp, typename ExitOp>
+static void
+emitCtorDtorPair(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder,
+ mlir::Location operandLocation, fir::GlobalOp globalOp,
+ mlir::acc::DataClause clause, std::stringstream &asFortran,
+ const std::string &ctorName) {
+ createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, EntryOp,
+ mlir::acc::DeclareEnterOp, ExitOp>(
+ modBuilder, builder, operandLocation, globalOp, clause, ctorName,
+ /*implicit=*/false, asFortran);
+
+ std::stringstream dtorName;
+ dtorName << globalOp.getSymName().str() << "_acc_dtor";
+ createDeclareGlobalOp<mlir::acc::GlobalDestructorOp,
+ mlir::acc::GetDevicePtrOp, mlir::acc::DeclareExitOp,
+ ExitOp>(modBuilder, builder, operandLocation, globalOp,
+ clause, dtorName.str(),
+ /*implicit=*/false, asFortran);
+}
+
template <typename EntryOp, typename ExitOp>
static void genDeclareDataOperandOperations(
const Fortran::parser::AccObjectList &objectList,
@@ -733,6 +811,37 @@ static void genDeclareDataOperandOperations(
std::stringstream asFortran;
mlir::Location operandLocation = genOperandLocation(converter, accObject);
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
+ // Handle COMMON/global symbols via module-level ctor/dtor path.
+ if (symbol.detailsIf<Fortran::semantics::CommonBlockDetails>() ||
+ Fortran::semantics::FindCommonBlockContaining(symbol)) {
+ emitCommonGlobal(
+ converter, builder, accObject, dataClause,
+ [&](mlir::OpBuilder &modBuilder, mlir::Location loc,
+ fir::GlobalOp globalOp, mlir::acc::DataClause clause,
+ std::stringstream &asFortranStr, const std::string &ctorName) {
+ if constexpr (std::is_same_v<EntryOp, mlir::acc::DeclareLinkOp>) {
+ createDeclareGlobalOp<
+ mlir::acc::GlobalConstructorOp, mlir::acc::DeclareLinkOp,
+ mlir::acc::DeclareEnterOp, mlir::acc::DeclareLinkOp>(
+ modBuilder, builder, loc, globalOp, clause, ctorName,
+ /*implicit=*/false, asFortranStr);
+ } else if constexpr (std::is_same_v<EntryOp, mlir::acc::CreateOp> ||
+ std::is_same_v<EntryOp, mlir::acc::CopyinOp> ||
+ std::is_same_v<
+ EntryOp,
+ mlir::acc::DeclareDeviceResidentOp> ||
+ std::is_same_v<ExitOp, mlir::acc::CopyoutOp>) {
+ emitCtorDtorPair<EntryOp, ExitOp>(modBuilder, builder, loc,
+ globalOp, clause, asFortranStr,
+ ctorName);
+ } else {
+ // No module-level ctor/dtor for this clause (e.g., deviceptr,
+ // present). Handled via structured declare region only.
+ return;
+ }
+ });
+ continue;
+ }
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
fir::factory::AddrAndBoundsInfo info =
@@ -4098,49 +4207,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
waitOp.setAsyncAttr(firOpBuilder.getUnitAttr());
}
-template <typename GlobalOp, typename EntryOp, typename DeclareOp,
- typename ExitOp>
-static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
- fir::FirOpBuilder &builder,
- mlir::Location loc, fir::GlobalOp globalOp,
- mlir::acc::DataClause clause,
- const std::string &declareGlobalName,
- bool implicit, std::stringstream &asFortran) {
- GlobalOp declareGlobalOp =
- GlobalOp::create(modBuilder, loc, declareGlobalName);
- builder.createBlock(&declareGlobalOp.getRegion(),
- declareGlobalOp.getRegion().end(), {}, {});
- builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back());
-
- fir::AddrOfOp addrOp = fir::AddrOfOp::create(
- builder, loc, fir::ReferenceType::get(globalOp.getType()),
- globalOp.getSymbol());
- addDeclareAttr(builder, addrOp, clause);
-
- llvm::SmallVector<mlir::Value> bounds;
- EntryOp entryOp = createDataEntryOp<EntryOp>(
- builder, loc, addrOp.getResTy(), asFortran, bounds,
- /*structured=*/false, implicit, clause, addrOp.getResTy().getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>)
- DeclareOp::create(builder, loc,
- mlir::acc::DeclareTokenType::get(entryOp.getContext()),
- mlir::ValueRange(entryOp.getAccVar()));
- else
- DeclareOp::create(builder, loc, mlir::Value{},
- mlir::ValueRange(entryOp.getAccVar()));
- if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) {
- ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
- entryOp.getBounds(), entryOp.getAsyncOperands(),
- entryOp.getAsyncOperandsDeviceTypeAttr(),
- entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
- /*structured=*/false, /*implicit=*/false,
- builder.getStringAttr(*entryOp.getName()));
- }
- mlir::acc::TerminatorOp::create(builder, loc);
- modBuilder.setInsertionPointAfter(declareGlobalOp);
-}
-
template <typename EntryOp>
static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
fir::FirOpBuilder &builder,
@@ -4317,6 +4383,66 @@ genGlobalCtorsWithModifier(Fortran::lower::AbstractConverter &converter,
dataClause);
}
+static fir::GlobalOp
+lookupGlobalBySymbolOrEquivalence(Fortran::lower::AbstractConverter &converter,
+ fir::FirOpBuilder &builder,
+ const Fortran::semantics::Symbol &sym) {
+ const Fortran::semantics::Symbol *commonBlock =
+ Fortran::semantics::FindCommonBlockContaining(sym);
+ std::string globalName = commonBlock ? converter.mangleName(*commonBlock)
+ : converter.mangleName(sym);
+ if (fir::GlobalOp g = builder.getNamedGlobal(globalName)) {
+ return g;
+ }
+ // Not found: if not a COMMON member, try equivalence members
+ if (!commonBlock) {
+ if (const Fortran::semantics::EquivalenceSet *eqSet =
+ Fortran::semantics::FindEquivalenceSet(sym)) {
+ for (const Fortran::semantics::EquivalenceObject &eqObj : *eqSet) {
+ std::string eqName = converter.mangleName(eqObj.symbol);
+ if (fir::GlobalOp g = builder.getNamedGlobal(eqName))
+ return g;
+ }
+ }
+ }
+ return {};
+}
+
+template <typename EmitterFn>
+static void emitCommonGlobal(Fortran::lower::AbstractConverter &converter,
+ fir::FirOpBuilder &builder,
+ const Fortran::parser::AccObject &obj,
+ mlir::acc::DataClause clause,
+ EmitterFn &&emitCtorDtor) {
+ Fortran::semantics::Symbol &sym = getSymbolFromAccObject(obj);
+ if (!(sym.detailsIf<Fortran::semantics::CommonBlockDetails>() ||
+ Fortran::semantics::FindCommonBlockContaining(sym)))
+ return;
+
+ fir::GlobalOp globalOp =
+ lookupGlobalBySymbolOrEquivalence(converter, builder, sym);
+ if (!globalOp)
+ llvm::report_fatal_error("could not retrieve global symbol");
+
+ std::stringstream ctorName;
+ ctorName << globalOp.getSymName().str() << "_acc_ctor";
+ if (builder.getModule().lookupSymbol<mlir::acc::GlobalConstructorOp>(
+ ctorName.str()))
+ return;
+
+ mlir::Location operandLocation = genOperandLocation(converter, obj);
+ addDeclareAttr(builder, globalOp.getOperation(), clause);
+ mlir::OpBuilder modBuilder(builder.getModule().getBodyRegion());
+ modBuilder.setInsertionPointAfter(globalOp);
+ std::stringstream asFortran;
+ asFortran << sym.name().ToString();
+
+ auto savedIP = builder.saveInsertionPoint();
+ emitCtorDtor(modBuilder, operandLocation, globalOp, clause, asFortran,
+ ctorName.str());
+ builder.restoreInsertionPoint(savedIP);
+}
+
static void
genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
@@ -4342,11 +4468,9 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
dataClauseOperands.end());
} else if (const auto *createClause =
std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
- const Fortran::parser::AccObjectListWithModifier &listWithModifier =
- createClause->v;
- const auto &accObjectList =
- std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
auto crtDataStart = dataClauseOperands.size();
+ const auto &accObjectList =
+ std::get<Fortran::parser::AccObjectList>(createClause->v.t);
genDeclareDataOperandOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
accObjectList, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_create,
@@ -4378,11 +4502,9 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
} else if (const auto *copyoutClause =
std::get_if<Fortran::parser::AccClause::Copyout>(
&clause.u)) {
- const Fortran::parser::AccObjectListWithModifier &listWithModifier =
- copyoutClause->v;
- const auto &accObjectList =
- std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
auto crtDataStart = dataClauseOperands.size();
+ const auto &accObjectList =
+ std::get<Fortran::parser::AccObjectList>(copyoutClause->v.t);
genDeclareDataOperandOperations<mlir::acc::CreateOp,
mlir::acc::CopyoutOp>(
accObjectList, converter, semanticsContext, stmtCtx,
@@ -4423,6 +4545,11 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
}
}
+ // If no structured operands were generated (all objects were COMMON),
+ // do not create a declare region.
+ if (dataClauseOperands.empty())
+ return;
+
mlir::func::FuncOp funcOp = builder.getFunction();
auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>();
mlir::Value declareToken;
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 85398be..1c163e6 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1080,9 +1080,8 @@ bool ClauseProcessor::processHasDeviceAddr(
[&](const omp::clause::HasDeviceAddr &clause,
const parser::CharBlock &source) {
mlir::Location location = converter.genLocation(source);
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::ClauseMapFlags mapTypeBits =
+ mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::implicit;
omp::ObjectList baseObjects;
llvm::transform(clause.v, std::back_inserter(baseObjects),
[&](const omp::Object &object) {
@@ -1217,8 +1216,7 @@ bool ClauseProcessor::processLink(
void ClauseProcessor::processMapObjects(
lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
- const omp::ObjectList &objects,
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
+ const omp::ObjectList &objects, mlir::omp::ClauseMapFlags mapTypeBits,
std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
llvm::SmallVectorImpl<mlir::Value> &mapVars,
llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms,
@@ -1310,10 +1308,7 @@ void ClauseProcessor::processMapObjects(
mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp(
firOpBuilder, location, baseOp,
/*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
- /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- mapTypeBits),
+ /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, mapTypeBits,
mlir::omp::VariableCaptureKind::ByRef, baseOp.getType(),
/*partialMap=*/false, mapperId);
@@ -1347,8 +1342,7 @@ bool ClauseProcessor::processMap(
objects] = clause.t;
if (attachMod)
TODO(currentLocation, "ATTACH modifier is not implemented yet");
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+ mlir::omp::ClauseMapFlags mapTypeBits = mlir::omp::ClauseMapFlags::none;
std::string mapperIdName = "__implicit_mapper";
// If the map type is specified, then process it else set the appropriate
// default value
@@ -1364,36 +1358,32 @@ bool ClauseProcessor::processMap(
switch (type) {
case Map::MapType::To:
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::to;
break;
case Map::MapType::From:
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::from;
break;
case Map::MapType::Tofrom:
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapTypeBits |=
+ mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from;
break;
case Map::MapType::Storage:
- // alloc and release is the default map_type for the Target Data
- // Ops, i.e. if no bits for map_type is supplied then alloc/release
- // (aka storage in 6.0+) is implicitly assumed based on the target
- // directive. Default value for Target Data and Enter Data is alloc
- // and for Exit Data it is release.
+ mapTypeBits |= mlir::omp::ClauseMapFlags::storage;
break;
}
if (typeMods) {
// TODO: Still requires "self" modifier, an OpenMP 6.0+ feature
if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Always))
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::always;
if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Present))
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::present;
if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Close))
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::close;
if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Delete))
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::del;
if (llvm::is_contained(*typeMods, Map::MapTypeModifier::OmpxHold))
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::ompx_hold;
}
if (iterator) {
@@ -1437,12 +1427,12 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
TODO(clauseLocation, "Iterator modifier is not supported yet");
}
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
+ mlir::omp::ClauseMapFlags mapTypeBits =
std::is_same_v<llvm::remove_cvref_t<decltype(clause)>, omp::clause::To>
- ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
- : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ ? mlir::omp::ClauseMapFlags::to
+ : mlir::omp::ClauseMapFlags::from;
if (expectation && *expectation == omp::clause::To::Expectation::Present)
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
+ mapTypeBits |= mlir::omp::ClauseMapFlags::present;
processMapObjects(stmtCtx, clauseLocation, objects, mapTypeBits,
parentMemberIndices, result.mapVars, mapSymbols);
};
@@ -1568,8 +1558,8 @@ bool ClauseProcessor::processUseDeviceAddr(
[&](const omp::clause::UseDeviceAddr &clause,
const parser::CharBlock &source) {
mlir::Location location = converter.genLocation(source);
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ mlir::omp::ClauseMapFlags mapTypeBits =
+ mlir::omp::ClauseMapFlags::return_param;
processMapObjects(stmtCtx, location, clause.v, mapTypeBits,
parentMemberIndices, result.useDeviceAddrVars,
useDeviceSyms);
@@ -1589,8 +1579,8 @@ bool ClauseProcessor::processUseDevicePtr(
[&](const omp::clause::UseDevicePtr &clause,
const parser::CharBlock &source) {
mlir::Location location = converter.genLocation(source);
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ mlir::omp::ClauseMapFlags mapTypeBits =
+ mlir::omp::ClauseMapFlags::return_param;
processMapObjects(stmtCtx, location, clause.v, mapTypeBits,
parentMemberIndices, result.useDevicePtrVars,
useDeviceSyms);
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 9e352fa..6452e39 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -194,8 +194,7 @@ private:
void processMapObjects(
lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
- const omp::ObjectList &objects,
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
+ const omp::ObjectList &objects, mlir::omp::ClauseMapFlags mapTypeBits,
std::map<Object, OmpMapParentAndMemberData> &parentMemberIndices,
llvm::SmallVectorImpl<mlir::Value> &mapVars,
llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 2a4ebf1..d39f9dd 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -16,8 +16,6 @@
#include "flang/Semantics/openmp-modifiers.h"
#include "flang/Semantics/symbol.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
-
#include <list>
#include <optional>
#include <tuple>
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9495ea6..a49961c 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -45,7 +45,6 @@
#include "mlir/Support/StateStack.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
using namespace Fortran::lower::omp;
using namespace Fortran::common::openmp;
@@ -945,8 +944,7 @@ getDefaultmapIfPresent(const DefaultMapsTy &defaultMaps, mlir::Type varType) {
return DefMap::ImplicitBehavior::Default;
}
-static std::pair<llvm::omp::OpenMPOffloadMappingFlags,
- mlir::omp::VariableCaptureKind>
+static std::pair<mlir::omp::ClauseMapFlags, mlir::omp::VariableCaptureKind>
getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
lower::AbstractConverter &converter,
const DefaultMapsTy &defaultMaps, mlir::Type varType,
@@ -967,8 +965,7 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
return size <= ptrSize && align <= ptrAlign;
};
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit;
auto implicitBehaviour = getDefaultmapIfPresent(defaultMaps, varType);
if (implicitBehaviour == DefMap::ImplicitBehavior::Default) {
@@ -986,8 +983,8 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
mlir::omp::DeclareTargetCaptureClause::link &&
declareTargetOp.getDeclareTargetDeviceType() !=
mlir::omp::DeclareTargetDeviceType::nohost) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapFlag |= mlir::omp::ClauseMapFlags::to;
+ mapFlag |= mlir::omp::ClauseMapFlags::from;
}
} else if (fir::isa_trivial(varType) || fir::isa_char(varType)) {
// Scalars behave as if they were "firstprivate".
@@ -996,18 +993,18 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
if (isLiteralType(varType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapFlag |= mlir::omp::ClauseMapFlags::to;
}
} else if (!fir::isa_builtin_cptr_type(varType)) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapFlag |= mlir::omp::ClauseMapFlags::to;
+ mapFlag |= mlir::omp::ClauseMapFlags::from;
}
return std::make_pair(mapFlag, captureKind);
}
switch (implicitBehaviour) {
case DefMap::ImplicitBehavior::Alloc:
- return std::make_pair(llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+ return std::make_pair(mlir::omp::ClauseMapFlags::storage,
mlir::omp::VariableCaptureKind::ByRef);
break;
case DefMap::ImplicitBehavior::Firstprivate:
@@ -1016,26 +1013,22 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
"behaviour");
break;
case DefMap::ImplicitBehavior::From:
- return std::make_pair(mapFlag |=
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM,
+ return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from,
mlir::omp::VariableCaptureKind::ByRef);
break;
case DefMap::ImplicitBehavior::Present:
- return std::make_pair(mapFlag |=
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT,
+ return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::present,
mlir::omp::VariableCaptureKind::ByRef);
break;
case DefMap::ImplicitBehavior::To:
- return std::make_pair(mapFlag |=
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO,
+ return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::to,
(fir::isa_trivial(varType) || fir::isa_char(varType))
? mlir::omp::VariableCaptureKind::ByCopy
: mlir::omp::VariableCaptureKind::ByRef);
break;
case DefMap::ImplicitBehavior::Tofrom:
- return std::make_pair(mapFlag |=
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO,
+ return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from |
+ mlir::omp::ClauseMapFlags::to,
mlir::omp::VariableCaptureKind::ByRef);
break;
case DefMap::ImplicitBehavior::Default:
@@ -1044,9 +1037,8 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
break;
}
- return std::make_pair(mapFlag |=
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO,
+ return std::make_pair(mapFlag |= mlir::omp::ClauseMapFlags::from |
+ mlir::omp::ClauseMapFlags::to,
mlir::omp::VariableCaptureKind::ByRef);
}
@@ -2612,18 +2604,14 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
eleType = refType.getElementType();
- std::pair<llvm::omp::OpenMPOffloadMappingFlags,
- mlir::omp::VariableCaptureKind>
+ std::pair<mlir::omp::ClauseMapFlags, mlir::omp::VariableCaptureKind>
mapFlagAndKind = getImplicitMapTypeAndKind(
firOpBuilder, converter, defaultMaps, eleType, loc, sym);
mlir::Value mapOp = createMapInfoOp(
firOpBuilder, converter.getCurrentLocation(), baseOp,
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/{},
- /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- std::get<0>(mapFlagAndKind)),
+ /*membersIndex=*/mlir::ArrayAttr{}, std::get<0>(mapFlagAndKind),
std::get<1>(mapFlagAndKind), baseOp.getType(),
/*partialMap=*/false, mapperId);
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 37b926e..6487f59 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -273,7 +273,7 @@ mlir::Value createParentSymAndGenIntermediateMaps(
semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits) {
+ mlir::omp::ClauseMapFlags mapTypeBits) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
/// Checks if an omp::Object is an array expression with a subscript, e.g.
@@ -414,11 +414,10 @@ mlir::Value createParentSymAndGenIntermediateMaps(
// be safer to just pass OMP_MAP_NONE as the map type, but we may still
// need some of the other map types the mapped member utilises, so for
// now it's good to keep an eye on this.
- llvm::omp::OpenMPOffloadMappingFlags interimMapType = mapTypeBits;
- interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
- interimMapType &=
- ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ mlir::omp::ClauseMapFlags interimMapType = mapTypeBits;
+ interimMapType &= ~mlir::omp::ClauseMapFlags::to;
+ interimMapType &= ~mlir::omp::ClauseMapFlags::from;
+ interimMapType &= ~mlir::omp::ClauseMapFlags::return_param;
// Create a map for the intermediate member and insert it and it's
// indices into the parentMemberIndices list to track it.
@@ -427,10 +426,7 @@ mlir::Value createParentSymAndGenIntermediateMaps(
/*varPtrPtr=*/mlir::Value{}, asFortran,
/*bounds=*/interimBounds,
/*members=*/{},
- /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- interimMapType),
+ /*membersIndex=*/mlir::ArrayAttr{}, interimMapType,
mlir::omp::VariableCaptureKind::ByRef, curValue.getType());
parentMemberIndices.memberPlacementIndices.push_back(interimIndices);
@@ -563,7 +559,8 @@ void insertChildMapInfoIntoParent(
// it allows this to work with enter and exit without causing MLIR
// verification issues. The more appropriate thing may be to take
// the "main" map type clause from the directive being used.
- uint64_t mapType = indices.second.memberMap[0].getMapType();
+ mlir::omp::ClauseMapFlags mapType =
+ indices.second.memberMap[0].getMapType();
llvm::SmallVector<mlir::Value> members;
members.reserve(indices.second.memberMap.size());
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 69499f9..ef1f37a 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -134,7 +134,7 @@ mlir::Value createParentSymAndGenIntermediateMaps(
semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx,
omp::ObjectList &objectList, llvm::SmallVectorImpl<int64_t> &indices,
OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran,
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits);
+ mlir::omp::ClauseMapFlags mapTypeBits);
omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember,
semantics::SemanticsContext &semaCtx);
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index 8b99913..817434f 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -20,8 +20,6 @@
#include "mlir/IR/Operation.h"
#include "mlir/Pass/Pass.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
-
namespace flangomp {
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
@@ -120,12 +118,9 @@ class AutomapToTargetDataPass
builder, memOp.getLoc(), memOp.getMemref().getType(),
memOp.getMemref(),
TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
- builder.getIntegerAttr(
- builder.getIntegerType(64, false),
- static_cast<unsigned>(
- isa<fir::StoreOp>(memOp)
- ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
- : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(
+ isa<fir::StoreOp>(memOp) ? omp::ClauseMapFlags::to
+ : omp::ClauseMapFlags::del),
builder.getAttr<omp::VariableCaptureKindAttr>(
omp::VariableCaptureKind::ByCopy),
/*var_ptr_ptr=*/mlir::Value{},
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 03ff163..65a23be 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -22,7 +22,6 @@
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
namespace flangomp {
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -568,16 +567,15 @@ private:
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType))
eleType = refType.getElementType();
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit;
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else if (!fir::isa_builtin_cptr_type(eleType)) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapFlag |= mlir::omp::ClauseMapFlags::to;
+ mapFlag |= mlir::omp::ClauseMapFlags::from;
}
llvm::SmallVector<mlir::Value> boundsOps;
@@ -587,11 +585,8 @@ private:
builder, liveIn.getLoc(), rawAddr,
/*varPtrPtr=*/{}, name.str(), boundsOps,
/*members=*/{},
- /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- mapFlag),
- captureKind, rawAddr.getType());
+ /*membersIndex=*/mlir::ArrayAttr{}, mapFlag, captureKind,
+ rawAddr.getType());
}
mlir::omp::TargetOp
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
index 9278e17..8a9b383 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
@@ -719,10 +719,9 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp,
SmallVector<Value> outerMapInfos;
// Create new mapinfo ops for the inner target region
for (auto mapInfo : mapInfos) {
- auto originalMapType =
- (llvm::omp::OpenMPOffloadMappingFlags)(mapInfo.getMapType());
+ mlir::omp::ClauseMapFlags originalMapType = mapInfo.getMapType();
auto originalCaptureType = mapInfo.getMapCaptureType();
- llvm::omp::OpenMPOffloadMappingFlags newMapType;
+ mlir::omp::ClauseMapFlags newMapType;
mlir::omp::VariableCaptureKind newCaptureType;
// For bycopy, we keep the same map type and capture type
// For byref, we change the map type to none and keep the capture type
@@ -730,7 +729,7 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp,
newMapType = originalMapType;
newCaptureType = originalCaptureType;
} else if (originalCaptureType == mlir::omp::VariableCaptureKind::ByRef) {
- newMapType = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+ newMapType = mlir::omp::ClauseMapFlags::storage;
newCaptureType = originalCaptureType;
outerMapInfos.push_back(mapInfo);
} else {
@@ -738,11 +737,8 @@ FailureOr<omp::TargetOp> splitTargetData(omp::TargetOp targetOp,
return failure();
}
auto innerMapInfo = cast<omp::MapInfoOp>(rewriter.clone(*mapInfo));
- innerMapInfo.setMapTypeAttr(rewriter.getIntegerAttr(
- rewriter.getIntegerType(64, false),
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- newMapType)));
+ innerMapInfo.setMapTypeAttr(
+ rewriter.getAttr<omp::ClauseMapFlagsAttr>(newMapType));
innerMapInfo.setMapCaptureType(newCaptureType);
innerMapInfos.push_back(innerMapInfo.getResult());
}
@@ -834,11 +830,11 @@ static TempOmpVar allocateTempOmpVar(Location loc, Type ty,
alloc = rewriter.create<fir::AllocaOp>(loc, allocType);
}
// Lambda to create mapinfo ops
- auto getMapInfo = [&](uint64_t mappingFlags, const char *name) {
+ auto getMapInfo = [&](mlir::omp::ClauseMapFlags mappingFlags,
+ const char *name) {
return rewriter.create<omp::MapInfoOp>(
loc, alloc.getType(), alloc, TypeAttr::get(allocType),
- rewriter.getIntegerAttr(rewriter.getIntegerType(64, /*isSigned=*/false),
- mappingFlags),
+ rewriter.getAttr<omp::ClauseMapFlagsAttr>(mappingFlags),
rewriter.getAttr<omp::VariableCaptureKindAttr>(
omp::VariableCaptureKind::ByRef),
/*varPtrPtr=*/Value{},
@@ -849,14 +845,10 @@ static TempOmpVar allocateTempOmpVar(Location loc, Type ty,
/*name=*/rewriter.getStringAttr(name), rewriter.getBoolAttr(false));
};
// Create mapinfo ops.
- uint64_t mapFrom =
- static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM);
- uint64_t mapTo =
- static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
- auto mapInfoFrom = getMapInfo(mapFrom, "__flang_workdistribute_from");
- auto mapInfoTo = getMapInfo(mapTo, "__flang_workdistribute_to");
+ auto mapInfoFrom = getMapInfo(mlir::omp::ClauseMapFlags::from,
+ "__flang_workdistribute_from");
+ auto mapInfoTo =
+ getMapInfo(mlir::omp::ClauseMapFlags::to, "__flang_workdistribute_to");
return TempOmpVar{mapInfoFrom, mapInfoTo};
}
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 2bbd803..566e88b 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -43,7 +43,6 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstddef>
@@ -350,7 +349,7 @@ class MapInfoFinalizationPass
/// the descriptor map onto the base address map.
mlir::omp::MapInfoOp genBaseAddrMap(mlir::Value descriptor,
mlir::OperandRange bounds,
- int64_t mapType,
+ mlir::omp::ClauseMapFlags mapType,
fir::FirOpBuilder &builder) {
mlir::Location loc = descriptor.getLoc();
mlir::Value baseAddrAddr = fir::BoxOffsetOp::create(
@@ -368,7 +367,7 @@ class MapInfoFinalizationPass
return mlir::omp::MapInfoOp::create(
builder, loc, baseAddrAddr.getType(), descriptor,
mlir::TypeAttr::get(underlyingVarType),
- builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(mapType),
builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
mlir::omp::VariableCaptureKind::ByRef),
baseAddrAddr, /*members=*/mlir::SmallVector<mlir::Value>{},
@@ -428,22 +427,22 @@ class MapInfoFinalizationPass
/// allowing `to` mappings, and `target update` not allowing both `to` and
/// `from` simultaneously. We currently try to maintain the `implicit` flag
/// where necessary, although it does not seem strictly required.
- unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
- mlir::Operation *target) {
- using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
+ mlir::omp::ClauseMapFlags
+ getDescriptorMapType(mlir::omp::ClauseMapFlags mapTypeFlag,
+ mlir::Operation *target) {
+ using mapFlags = mlir::omp::ClauseMapFlags;
if (llvm::isa_and_nonnull<mlir::omp::TargetExitDataOp,
mlir::omp::TargetUpdateOp>(target))
return mapTypeFlag;
- mapFlags flags = mapFlags::OMP_MAP_TO |
- (mapFlags(mapTypeFlag) &
- (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
+ mapFlags flags =
+ mapFlags::to | (mapTypeFlag & (mapFlags::implicit | mapFlags::always));
// For unified_shared_memory, we additionally add `CLOSE` on the descriptor
// to ensure device-local placement where required by tests relying on USM +
// close semantics.
if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>()))
- flags |= mapFlags::OMP_MAP_CLOSE;
- return llvm::to_underlying(flags);
+ flags |= mapFlags::close;
+ return flags;
}
/// Check if the mapOp is present in the HasDeviceAddr clause on
@@ -493,11 +492,6 @@ class MapInfoFinalizationPass
mlir::Value boxAddr = fir::BoxOffsetOp::create(
builder, loc, op.getVarPtr(), fir::BoxFieldAttr::base_addr);
- uint64_t mapTypeToImplicit = static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
-
mlir::ArrayAttr newMembersAttr;
llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
@@ -506,8 +500,9 @@ class MapInfoFinalizationPass
mlir::omp::MapInfoOp memberMapInfoOp = mlir::omp::MapInfoOp::create(
builder, op.getLoc(), varPtr.getType(), varPtr,
mlir::TypeAttr::get(boxCharType.getEleTy()),
- builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false),
- mapTypeToImplicit),
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(
+ mlir::omp::ClauseMapFlags::to |
+ mlir::omp::ClauseMapFlags::implicit),
builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
mlir::omp::VariableCaptureKind::ByRef),
/*varPtrPtr=*/boxAddr,
@@ -568,12 +563,9 @@ class MapInfoFinalizationPass
mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
// Force CLOSE in USM paths so the pointer gets device-local placement
// when required by tests relying on USM + close semantics.
- uint64_t mapTypeVal =
- op.getMapType() |
- llvm::to_underlying(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
- mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr(
- builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal);
+ mlir::omp::ClauseMapFlagsAttr mapTypeAttr =
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(
+ op.getMapType() | mlir::omp::ClauseMapFlags::close);
mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create(
builder, loc, coord.getType(), coord,
@@ -683,17 +675,16 @@ class MapInfoFinalizationPass
// one place in the code may differ from that address in another place.
// The contents of the descriptor (the base address in particular) will
// remain unchanged though.
- uint64_t mapType = op.getMapType();
+ mlir::omp::ClauseMapFlags mapType = op.getMapType();
if (isHasDeviceAddrFlag) {
- mapType |= llvm::to_underlying(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS);
+ mapType |= mlir::omp::ClauseMapFlags::always;
}
mlir::omp::MapInfoOp newDescParentMapOp = mlir::omp::MapInfoOp::create(
builder, op->getLoc(), op.getResult().getType(), descriptor,
mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())),
- builder.getIntegerAttr(builder.getIntegerType(64, false),
- getDescriptorMapType(mapType, target)),
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(
+ getDescriptorMapType(mapType, target)),
op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers,
newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{},
/*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
@@ -896,11 +887,9 @@ class MapInfoFinalizationPass
builder.create<mlir::omp::MapInfoOp>(
op->getLoc(), op.getResult().getType(), op.getVarPtr(),
op.getVarTypeAttr(),
- builder.getIntegerAttr(
- builder.getIntegerType(64, false),
- llvm::to_underlying(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)),
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(
+ mlir::omp::ClauseMapFlags::to |
+ mlir::omp::ClauseMapFlags::always),
op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{},
mlir::SmallVector<mlir::Value>{}, mlir::ArrayAttr{},
/*bounds=*/mlir::SmallVector<mlir::Value>{},
@@ -1240,9 +1229,8 @@ class MapInfoFinalizationPass
// we need to change this check for early return OR live with
// over-mapping.
bool hasImplicitMap =
- (llvm::omp::OpenMPOffloadMappingFlags(op.getMapType()) &
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT) ==
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ (op.getMapType() & mlir::omp::ClauseMapFlags::implicit) ==
+ mlir::omp::ClauseMapFlags::implicit;
if (hasImplicitMap)
return;
diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 3032857..0972861 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -35,7 +35,6 @@
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Support/Debug.h"
#include <type_traits>
@@ -70,9 +69,6 @@ class MapsForPrivatizedSymbolsPass
return size <= ptrSize && align <= ptrAlign;
};
- uint64_t mapTypeTo = static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
Operation *definingOp = var.getDefiningOp();
Value varPtr = var;
@@ -122,8 +118,7 @@ class MapsForPrivatizedSymbolsPass
builder, loc, varPtr.getType(), varPtr,
TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType())
.getElementType()),
- builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false),
- mapTypeTo),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(omp::ClauseMapFlags::to),
builder.getAttr<omp::VariableCaptureKindAttr>(captureKind),
/*varPtrPtr=*/Value{},
/*members=*/SmallVector<Value>{},
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 759e3a65d..8d00272 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -454,6 +454,8 @@ struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> {
mlir::LogicalResult
matchAndRewrite(fir::DeclareOp op,
mlir::PatternRewriter &rewriter) const override {
+ if (op.getResult().getUsers().empty())
+ return success();
if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) {
if (auto global = symTab.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue())) {
@@ -963,6 +965,8 @@ public:
}
target.addDynamicallyLegalOp<fir::DeclareOp>([&](fir::DeclareOp op) {
+ if (op.getResult().getUsers().empty())
+ return true;
if (inDeviceContext(op))
return true;
if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) {
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
index ad035e6..0dec5652 100644
--- a/flang/lib/Parser/openacc-parsers.cpp
+++ b/flang/lib/Parser/openacc-parsers.cpp
@@ -75,21 +75,21 @@ TYPE_PARSER(
// tile size is one of:
// * (represented as an empty std::optional<ScalarIntExpr>)
// constant-int-expr
-TYPE_PARSER(construct<AccTileExpr>(scalarIntConstantExpr) ||
+TYPE_PARSER(sourced(construct<AccTileExpr>(scalarIntConstantExpr) ||
construct<AccTileExpr>(
- "*" >> construct<std::optional<ScalarIntConstantExpr>>()))
+ "*" >> construct<std::optional<ScalarIntConstantExpr>>())))
TYPE_PARSER(construct<AccTileExprList>(nonemptyList(Parser<AccTileExpr>{})))
// 2.9 (1979-1982) gang-arg is one of :
// [num:]int-expr
// dim:int-expr
// static:size-expr
-TYPE_PARSER(construct<AccGangArg>(construct<AccGangArg::Static>(
- "STATIC: " >> Parser<AccSizeExpr>{})) ||
+TYPE_PARSER(sourced(construct<AccGangArg>(construct<AccGangArg::Static>(
+ "STATIC: " >> Parser<AccSizeExpr>{})) ||
construct<AccGangArg>(
construct<AccGangArg::Dim>("DIM: " >> scalarIntExpr)) ||
construct<AccGangArg>(
- construct<AccGangArg::Num>(maybe("NUM: "_tok) >> scalarIntExpr)))
+ construct<AccGangArg::Num>(maybe("NUM: "_tok) >> scalarIntExpr))))
// 2.9 gang-arg-list
TYPE_PARSER(
@@ -101,7 +101,7 @@ TYPE_PARSER(construct<AccCollapseArg>(
// 2.5.15 Reduction, F'2023 R1131, and CUF reduction-op
// Operator for reduction
-TYPE_PARSER(sourced(construct<ReductionOperator>(
+TYPE_PARSER(construct<ReductionOperator>(
first("+" >> pure(ReductionOperator::Operator::Plus),
"*" >> pure(ReductionOperator::Operator::Multiply),
"MAX" >> pure(ReductionOperator::Operator::Max),
@@ -112,32 +112,32 @@ TYPE_PARSER(sourced(construct<ReductionOperator>(
".AND." >> pure(ReductionOperator::Operator::And),
".OR." >> pure(ReductionOperator::Operator::Or),
".EQV." >> pure(ReductionOperator::Operator::Eqv),
- ".NEQV." >> pure(ReductionOperator::Operator::Neqv)))))
+ ".NEQV." >> pure(ReductionOperator::Operator::Neqv))))
// 2.15.1 Bind clause
-TYPE_PARSER(sourced(construct<AccBindClause>(name)) ||
- sourced(construct<AccBindClause>(scalarDefaultCharExpr)))
+TYPE_PARSER(sourced(construct<AccBindClause>(name) ||
+ construct<AccBindClause>(scalarDefaultCharExpr)))
// 2.5.16 Default clause
-TYPE_PARSER(construct<AccDefaultClause>(
+TYPE_PARSER(sourced(construct<AccDefaultClause>(
first("NONE" >> pure(llvm::acc::DefaultValue::ACC_Default_none),
- "PRESENT" >> pure(llvm::acc::DefaultValue::ACC_Default_present))))
+ "PRESENT" >> pure(llvm::acc::DefaultValue::ACC_Default_present)))))
// SELF clause is either a simple optional condition for compute construct
// or a synonym of the HOST clause for the update directive 2.14.4 holding
// an object list.
-TYPE_PARSER(
+TYPE_PARSER(sourced(
construct<AccSelfClause>(Parser<AccObjectList>{}) / lookAhead(")"_tok) ||
- construct<AccSelfClause>(scalarLogicalExpr / lookAhead(")"_tok)) ||
+ construct<AccSelfClause>(scalarLogicalExpr) / lookAhead(")"_tok) ||
construct<AccSelfClause>(
recovery(fail<std::optional<ScalarLogicalExpr>>(
"logical expression or object list expected"_err_en_US),
- SkipTo<')'>{} >> pure<std::optional<ScalarLogicalExpr>>())))
+ SkipTo<')'>{} >> pure<std::optional<ScalarLogicalExpr>>()))))
// Modifier for copyin, copyout, cache and create
-TYPE_PARSER(construct<AccDataModifier>(
+TYPE_PARSER(sourced(construct<AccDataModifier>(
first("ZERO:" >> pure(AccDataModifier::Modifier::Zero),
- "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))
+ "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly)))))
// Combined directives
TYPE_PARSER(sourced(construct<AccCombinedDirective>(
@@ -166,14 +166,13 @@ TYPE_PARSER(sourced(construct<AccStandaloneDirective>(
TYPE_PARSER(sourced(construct<AccLoopDirective>(
first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop)))))
-TYPE_PARSER(construct<AccBeginLoopDirective>(
- sourced(Parser<AccLoopDirective>{}), Parser<AccClauseList>{}))
+TYPE_PARSER(sourced(construct<AccBeginLoopDirective>(
+ Parser<AccLoopDirective>{}, Parser<AccClauseList>{})))
TYPE_PARSER(construct<AccEndLoop>("END LOOP"_tok))
TYPE_PARSER(construct<OpenACCLoopConstruct>(
- sourced(Parser<AccBeginLoopDirective>{} / endAccLine),
- maybe(Parser<DoConstruct>{}),
+ Parser<AccBeginLoopDirective>{} / endAccLine, maybe(Parser<DoConstruct>{}),
maybe(startAccLine >> Parser<AccEndLoop>{} / endAccLine)))
// 2.15.1 Routine directive
@@ -186,8 +185,8 @@ TYPE_PARSER(sourced(
parenthesized(Parser<AccObjectListWithModifier>{}))))
// 2.11 Combined constructs
-TYPE_PARSER(construct<AccBeginCombinedDirective>(
- sourced(Parser<AccCombinedDirective>{}), Parser<AccClauseList>{}))
+TYPE_PARSER(sourced(construct<AccBeginCombinedDirective>(
+ Parser<AccCombinedDirective>{}, Parser<AccClauseList>{})))
// 2.12 Atomic constructs
TYPE_PARSER(construct<AccEndAtomic>(startAccLine >> "END ATOMIC"_tok))
@@ -213,10 +212,10 @@ TYPE_PARSER("ATOMIC" >>
statement(assignmentStmt), Parser<AccEndAtomic>{} / endAccLine))
TYPE_PARSER(
- sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{})) ||
- sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{})) ||
- sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{})) ||
- sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{})))
+ sourced(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{}) ||
+ construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{}) ||
+ construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{}) ||
+ construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{})))
// 2.13 Declare constructs
TYPE_PARSER(sourced(construct<AccDeclarativeDirective>(
@@ -250,18 +249,18 @@ TYPE_PARSER(construct<OpenACCBlockConstruct>(
pure(llvm::acc::Directive::ACCD_data))))))
// Standalone constructs
-TYPE_PARSER(construct<OpenACCStandaloneConstruct>(
- sourced(Parser<AccStandaloneDirective>{}), Parser<AccClauseList>{}))
+TYPE_PARSER(sourced(construct<OpenACCStandaloneConstruct>(
+ Parser<AccStandaloneDirective>{}, Parser<AccClauseList>{})))
// Standalone declarative constructs
-TYPE_PARSER(construct<OpenACCStandaloneDeclarativeConstruct>(
- sourced(Parser<AccDeclarativeDirective>{}), Parser<AccClauseList>{}))
+TYPE_PARSER(sourced(construct<OpenACCStandaloneDeclarativeConstruct>(
+ Parser<AccDeclarativeDirective>{}, Parser<AccClauseList>{})))
TYPE_PARSER(startAccLine >>
withMessage("expected OpenACC directive"_err_en_US,
- first(sourced(construct<OpenACCDeclarativeConstruct>(
- Parser<OpenACCStandaloneDeclarativeConstruct>{})),
- sourced(construct<OpenACCDeclarativeConstruct>(
+ sourced(first(construct<OpenACCDeclarativeConstruct>(
+ Parser<OpenACCStandaloneDeclarativeConstruct>{}),
+ construct<OpenACCDeclarativeConstruct>(
Parser<OpenACCRoutineConstruct>{})))))
TYPE_PARSER(sourced(construct<OpenACCEndConstruct>(
@@ -293,9 +292,9 @@ TYPE_PARSER(startAccLine >>
"SERIAL"_tok >> maybe("LOOP"_tok) >>
pure(llvm::acc::Directive::ACCD_serial_loop))))))
-TYPE_PARSER(construct<OpenACCCombinedConstruct>(
- sourced(Parser<AccBeginCombinedDirective>{} / endAccLine),
+TYPE_PARSER(sourced(construct<OpenACCCombinedConstruct>(
+ Parser<AccBeginCombinedDirective>{} / endAccLine,
maybe(Parser<DoConstruct>{}),
- maybe(Parser<AccEndCombinedDirective>{} / endAccLine)))
+ maybe(Parser<AccEndCombinedDirective>{} / endAccLine))))
} // namespace Fortran::parser
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 66e5b2c..df0372b 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -140,17 +140,9 @@ void Prescanner::Statement() {
CHECK(*at_ == '!');
}
std::optional<int> condOffset;
- if (InOpenMPConditionalLine()) {
+ if (InOpenMPConditionalLine()) { // !$
condOffset = 2;
- } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' &&
- directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' &&
- directiveSentinel_[4] == '\0') {
- // CUDA conditional compilation line.
- condOffset = 5;
- } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'a' &&
- directiveSentinel_[2] == 'c' && directiveSentinel_[3] == 'c' &&
- directiveSentinel_[4] == '\0') {
- // OpenACC conditional compilation line.
+ } else if (InOpenACCOrCUDAConditionalLine()) { // !@acc or !@cuf
condOffset = 5;
}
if (condOffset && !preprocessingOnly_) {
@@ -166,7 +158,8 @@ void Prescanner::Statement() {
} else {
// Compiler directive. Emit normalized sentinel, squash following spaces.
// Conditional compilation lines (!$) take this path in -E mode too
- // so that -fopenmp only has to appear on the later compilation.
+ // so that -fopenmp only has to appear on the later compilation
+ // (ditto for !@cuf and !@acc).
EmitChar(tokens, '!');
++at_, ++column_;
for (const char *sp{directiveSentinel_}; *sp != '\0';
@@ -202,7 +195,7 @@ void Prescanner::Statement() {
}
tokens.CloseToken();
SkipSpaces();
- if (InOpenMPConditionalLine() && inFixedForm_ && !tabInCurrentLine_ &&
+ if (InConditionalLine() && inFixedForm_ && !tabInCurrentLine_ &&
column_ == 6 && *at_ != '\n') {
// !$ 0 - turn '0' into a space
// !$ 1 - turn '1' into '&'
@@ -347,7 +340,7 @@ void Prescanner::Statement() {
while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
newlineProvenance = GetCurrentProvenance();
}
- if (preprocessingOnly_ && inFixedForm_ && InOpenMPConditionalLine() &&
+ if (preprocessingOnly_ && inFixedForm_ && InConditionalLine() &&
nextLine_ < limit_) {
// In -E mode, when the line after !$ conditional compilation is a
// regular fixed form continuation line, append a '&' to the line.
@@ -1360,11 +1353,10 @@ const char *Prescanner::FixedFormContinuationLine(bool atNewline) {
features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
nextLine_[4] == ' '};
- if (InCompilerDirective() &&
- !(InOpenMPConditionalLine() && !preprocessingOnly_)) {
+ if (InCompilerDirective() && !(InConditionalLine() && !preprocessingOnly_)) {
// !$ under -E is not continued, but deferred to later compilation
if (IsFixedFormCommentChar(col1) &&
- !(InOpenMPConditionalLine() && preprocessingOnly_)) {
+ !(InConditionalLine() && preprocessingOnly_)) {
int j{1};
for (; j < 5; ++j) {
char ch{directiveSentinel_[j - 1]};
@@ -1443,7 +1435,7 @@ const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
}
p = SkipWhiteSpaceIncludingEmptyMacros(p);
if (InCompilerDirective()) {
- if (InOpenMPConditionalLine()) {
+ if (InConditionalLine()) {
if (preprocessingOnly_) {
// in -E mode, don't treat !$ as a continuation
return nullptr;
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index fc38adb..5e74817 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -171,7 +171,17 @@ private:
bool InOpenMPConditionalLine() const {
return directiveSentinel_ && directiveSentinel_[0] == '$' &&
!directiveSentinel_[1];
- ;
+ }
+ bool InOpenACCOrCUDAConditionalLine() const {
+ return directiveSentinel_ && directiveSentinel_[0] == '@' &&
+ ((directiveSentinel_[1] == 'a' && directiveSentinel_[2] == 'c' &&
+ directiveSentinel_[3] == 'c') ||
+ (directiveSentinel_[1] == 'c' && directiveSentinel_[2] == 'u' &&
+ directiveSentinel_[3] == 'f')) &&
+ directiveSentinel_[4] == '\0';
+ }
+ bool InConditionalLine() const {
+ return InOpenMPConditionalLine() || InOpenACCOrCUDAConditionalLine();
}
bool InFixedFormSource() const {
return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 4cb0b74..b3fd6c8 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -19,7 +19,6 @@
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/openmp-directive-sets.h"
#include "flang/Semantics/semantics.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
using OmpClauseSet =
Fortran::common::EnumSet<llvm::omp::Clause, llvm::omp::Clause_enumSize>;
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 33e9ea5..b0c36ec 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -31,15 +31,17 @@
#include <list>
#include <map>
+namespace Fortran::semantics {
+
template <typename T>
-static Fortran::semantics::Scope *GetScope(
- Fortran::semantics::SemanticsContext &context, const T &x) {
- std::optional<Fortran::parser::CharBlock> source{GetLastSource(x)};
- return source ? &context.FindScope(*source) : nullptr;
+static Scope *GetScope(SemanticsContext &context, const T &x) {
+ if (auto source{GetLastSource(x)}) {
+ return &context.FindScope(*source);
+ } else {
+ return nullptr;
+ }
}
-namespace Fortran::semantics {
-
template <typename T> class DirectiveAttributeVisitor {
public:
explicit DirectiveAttributeVisitor(SemanticsContext &context)
@@ -361,7 +363,7 @@ private:
void ResolveAccObject(const parser::AccObject &, Symbol::Flag);
Symbol *ResolveAcc(const parser::Name &, Symbol::Flag, Scope &);
Symbol *ResolveAcc(Symbol &, Symbol::Flag, Scope &);
- Symbol *ResolveName(const parser::Name &, bool parentScope = false);
+ Symbol *ResolveName(const parser::Name &);
Symbol *ResolveFctName(const parser::Name &);
Symbol *ResolveAccCommonBlockName(const parser::Name *);
Symbol *DeclareOrMarkOtherAccessEntity(const parser::Name &, Symbol::Flag);
@@ -1257,31 +1259,22 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCStandaloneConstruct &x) {
return true;
}
-Symbol *AccAttributeVisitor::ResolveName(
- const parser::Name &name, bool parentScope) {
- Symbol *prev{currScope().FindSymbol(name.source)};
- // Check in parent scope if asked for.
- if (!prev && parentScope) {
- prev = currScope().parent().FindSymbol(name.source);
- }
- if (prev != name.symbol) {
- name.symbol = prev;
- }
- return prev;
+Symbol *AccAttributeVisitor::ResolveName(const parser::Name &name) {
+ return name.symbol;
}
Symbol *AccAttributeVisitor::ResolveFctName(const parser::Name &name) {
Symbol *prev{currScope().FindSymbol(name.source)};
- if (!prev || (prev && prev->IsFuncResult())) {
+ if (prev && prev->IsFuncResult()) {
prev = currScope().parent().FindSymbol(name.source);
- if (!prev) {
- prev = &context_.globalScope().MakeSymbol(
- name.source, Attrs{}, ProcEntityDetails{});
- }
}
- if (prev != name.symbol) {
- name.symbol = prev;
+ if (!prev) {
+ prev = &*context_.globalScope()
+ .try_emplace(name.source, ProcEntityDetails{})
+ .first->second;
}
+ CHECK(!name.symbol || name.symbol == prev);
+ name.symbol = prev;
return prev;
}
@@ -1388,9 +1381,8 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCRoutineConstruct &x) {
} else {
PushContext(verbatim.source, llvm::acc::Directive::ACCD_routine);
}
- const auto &optName{std::get<std::optional<parser::Name>>(x.t)};
- if (optName) {
- if (Symbol *sym = ResolveFctName(*optName)) {
+ if (const auto &optName{std::get<std::optional<parser::Name>>(x.t)}) {
+ if (Symbol * sym{ResolveFctName(*optName)}) {
Symbol &ultimate{sym->GetUltimate()};
AddRoutineInfoToSymbol(ultimate, x);
} else {
@@ -1425,7 +1417,7 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCCombinedConstruct &x) {
case llvm::acc::Directive::ACCD_kernels_loop:
case llvm::acc::Directive::ACCD_parallel_loop:
case llvm::acc::Directive::ACCD_serial_loop:
- PushContext(combinedDir.source, combinedDir.v);
+ PushContext(x.source, combinedDir.v);
break;
default:
break;
@@ -1706,26 +1698,27 @@ void AccAttributeVisitor::Post(const parser::AccDefaultClause &x) {
}
}
-// For OpenACC constructs, check all the data-refs within the constructs
-// and adjust the symbol for each Name if necessary
void AccAttributeVisitor::Post(const parser::Name &name) {
- auto *symbol{name.symbol};
- if (symbol && WithinConstruct()) {
- symbol = &symbol->GetUltimate();
- if (!symbol->owner().IsDerivedType() && !symbol->has<ProcEntityDetails>() &&
- !symbol->has<SubprogramDetails>() && !IsObjectWithVisibleDSA(*symbol)) {
+ if (name.symbol && WithinConstruct()) {
+ const Symbol &symbol{name.symbol->GetUltimate()};
+ if (!symbol.owner().IsDerivedType() && !symbol.has<ProcEntityDetails>() &&
+ !symbol.has<SubprogramDetails>() && !IsObjectWithVisibleDSA(symbol)) {
if (Symbol * found{currScope().FindSymbol(name.source)}) {
- if (symbol != found) {
- name.symbol = found; // adjust the symbol within region
+ if (&symbol != found) {
+ // adjust the symbol within the region
+ // TODO: why didn't name resolution set the right name originally?
+ name.symbol = found;
} else if (GetContext().defaultDSA == Symbol::Flag::AccNone) {
// 2.5.14.
context_.Say(name.source,
"The DEFAULT(NONE) clause requires that '%s' must be listed in a data-mapping clause"_err_en_US,
- symbol->name());
+ symbol.name());
}
+ } else {
+ // TODO: assertion here? or clear name.symbol?
}
}
- } // within OpenACC construct
+ }
}
Symbol *AccAttributeVisitor::ResolveAccCommonBlockName(
@@ -1810,13 +1803,11 @@ Symbol *AccAttributeVisitor::ResolveAcc(
Symbol *AccAttributeVisitor::DeclareOrMarkOtherAccessEntity(
const parser::Name &name, Symbol::Flag accFlag) {
- Symbol *prev{currScope().FindSymbol(name.source)};
- if (!name.symbol || !prev) {
+ if (name.symbol) {
+ return DeclareOrMarkOtherAccessEntity(*name.symbol, accFlag);
+ } else {
return nullptr;
- } else if (prev != name.symbol) {
- name.symbol = prev;
}
- return DeclareOrMarkOtherAccessEntity(*prev, accFlag);
}
Symbol *AccAttributeVisitor::DeclareOrMarkOtherAccessEntity(
@@ -2990,6 +2981,7 @@ void OmpAttributeVisitor::Post(const parser::Name &name) {
}
Symbol *OmpAttributeVisitor::ResolveName(const parser::Name *name) {
+ // TODO: why is the symbol not properly resolved by name resolution?
if (auto *resolvedSymbol{
name ? GetContext().scope.FindSymbol(name->source) : nullptr}) {
name->symbol = resolvedSymbol;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 0af1c94..88cc446 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1441,6 +1441,30 @@ public:
void Post(const parser::AccBeginLoopDirective &x) {
messageHandler().set_currStmtSource(std::nullopt);
}
+ bool Pre(const parser::OpenACCStandaloneConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
+ bool Pre(const parser::OpenACCCacheConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
+ bool Pre(const parser::OpenACCWaitConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
+ bool Pre(const parser::OpenACCAtomicConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
+ bool Pre(const parser::OpenACCEndConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
+ bool Pre(const parser::OpenACCDeclarativeConstruct &x) {
+ currScope().AddSourceRange(x.source);
+ return true;
+ }
void CopySymbolWithDevice(const parser::Name *name);
@@ -1480,7 +1504,8 @@ void AccVisitor::CopySymbolWithDevice(const parser::Name *name) {
// symbols are created for the one appearing in the use_device
// clause. These new symbols have the CUDA Fortran device
// attribute.
- if (context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) {
+ if (context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA) &&
+ name->symbol) {
name->symbol = currScope().CopySymbol(*name->symbol);
if (auto *object{name->symbol->detailsIf<ObjectEntityDetails>()}) {
object->set_cudaDataAttr(common::CUDADataAttr::Device);
@@ -1490,15 +1515,12 @@ void AccVisitor::CopySymbolWithDevice(const parser::Name *name) {
bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) {
for (const auto &accObject : x.v.v) {
+ Walk(accObject);
common::visit(
common::visitors{
[&](const parser::Designator &designator) {
if (const auto *name{
parser::GetDesignatorNameIfDataRef(designator)}) {
- Symbol *prev{currScope().FindSymbol(name->source)};
- if (prev != name->symbol) {
- name->symbol = prev;
- }
CopySymbolWithDevice(name);
} else {
if (const auto *dataRef{
@@ -1507,13 +1529,8 @@ bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) {
common::Indirection<parser::ArrayElement>;
if (auto *ind{std::get_if<ElementIndirection>(&dataRef->u)}) {
const parser::ArrayElement &arrayElement{ind->value()};
- Walk(arrayElement.subscripts);
const parser::DataRef &base{arrayElement.base};
if (auto *name{std::get_if<parser::Name>(&base.u)}) {
- Symbol *prev{currScope().FindSymbol(name->source)};
- if (prev != name->symbol) {
- name->symbol = prev;
- }
CopySymbolWithDevice(name);
}
}
@@ -1537,6 +1554,7 @@ void AccVisitor::Post(const parser::OpenACCBlockConstruct &x) {
bool AccVisitor::Pre(const parser::OpenACCCombinedConstruct &x) {
PushScope(Scope::Kind::OpenACCConstruct, nullptr);
+ currScope().AddSourceRange(x.source);
return true;
}
@@ -5433,7 +5451,8 @@ void SubprogramVisitor::PushBlockDataScope(const parser::Name &name) {
}
}
-// If name is a generic, return specific subprogram with the same name.
+// If name is a generic in the same scope, return its specific subprogram with
+// the same name, if any.
Symbol *SubprogramVisitor::GetSpecificFromGeneric(const parser::Name &name) {
// Search for the name but don't resolve it
if (auto *symbol{currScope().FindSymbol(name.source)}) {
@@ -5443,6 +5462,9 @@ Symbol *SubprogramVisitor::GetSpecificFromGeneric(const parser::Name &name) {
// symbol doesn't inherit it and ruin the ability to check it.
symbol->attrs().reset(Attr::MODULE);
}
+ } else if (&symbol->owner() != &currScope() && inInterfaceBlock() &&
+ !isGeneric()) {
+ // non-generic interface shadows outer definition
} else if (auto *details{symbol->detailsIf<GenericDetails>()}) {
// found generic, want specific procedure
auto *specific{details->specific()};
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index bdb5377..2606d99 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -452,6 +452,15 @@ void SemanticsContext::UpdateScopeIndex(
}
}
+void SemanticsContext::DumpScopeIndex(llvm::raw_ostream &out) const {
+ out << "scopeIndex_:\n";
+ for (const auto &[source, scope] : scopeIndex_) {
+ out << "source '" << source.ToString() << "' -> scope " << scope
+ << "... whose source range is '" << scope.sourceRange().ToString()
+ << "'\n";
+ }
+}
+
bool SemanticsContext::IsInModuleFile(parser::CharBlock source) const {
for (const Scope *scope{&FindScope(source)}; !scope->IsGlobal();
scope = &scope->parent()) {
diff --git a/flang/lib/Utils/OpenMP.cpp b/flang/lib/Utils/OpenMP.cpp
index 2261912..15a42c3 100644
--- a/flang/lib/Utils/OpenMP.cpp
+++ b/flang/lib/Utils/OpenMP.cpp
@@ -22,8 +22,9 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder,
mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr,
llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
- uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
- mlir::Type retTy, bool partialMap, mlir::FlatSymbolRefAttr mapperId) {
+ mlir::omp::ClauseMapFlags mapType,
+ mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
+ bool partialMap, mlir::FlatSymbolRefAttr mapperId) {
if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
baseAddr = fir::BoxAddrOp::create(builder, loc, baseAddr);
@@ -42,7 +43,7 @@ mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder,
mlir::omp::MapInfoOp op =
mlir::omp::MapInfoOp::create(builder, loc, retTy, baseAddr, varType,
- builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
+ builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(mapType),
builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
varPtrPtr, members, membersIndex, bounds, mapperId,
builder.getStringAttr(name), builder.getBoolAttr(partialMap));
@@ -75,8 +76,7 @@ mlir::Value mapTemporaryValue(fir::FirOpBuilder &firOpBuilder,
firOpBuilder.setInsertionPoint(targetOp);
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit;
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
@@ -88,16 +88,14 @@ mlir::Value mapTemporaryValue(fir::FirOpBuilder &firOpBuilder,
if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else if (!fir::isa_builtin_cptr_type(eleType)) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapFlag |= mlir::omp::ClauseMapFlags::to;
}
mlir::Value mapOp = createMapInfoOp(firOpBuilder, copyVal.getLoc(), copyVal,
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
/*members=*/llvm::SmallVector<mlir::Value>{},
- /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- mapFlag),
- captureKind, copyVal.getType());
+ /*membersIndex=*/mlir::ArrayAttr{}, mapFlag, captureKind,
+ copyVal.getType());
auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
mlir::Region &region = targetOp.getRegion();
diff --git a/flang/test/Fir/CUDA/cuda-global-addr.mlir b/flang/test/Fir/CUDA/cuda-global-addr.mlir
index 3e50c7a..6f7816c 100644
--- a/flang/test/Fir/CUDA/cuda-global-addr.mlir
+++ b/flang/test/Fir/CUDA/cuda-global-addr.mlir
@@ -63,6 +63,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> :
// We cannot call _FortranACUFGetDeviceAddress on a constant global.
// There is no symbol for it and the call would result into an unresolved reference.
+// CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "arraysize"}
// CHECK-NOT: fir.call {{.*}}GetDeviceAddress
// -----
@@ -90,3 +91,22 @@ func.func @_QQmain() attributes {fir.bindc_name = "test"} {
// CHECK-NOT: fir.call {{.*}}GetDeviceAddress
}
+
+// -----
+
+// Check that we do not introduce call to _FortranACUFGetDeviceAddress when the
+// value has no user.
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+ func.func @_QQmain() attributes {fir.bindc_name = "T"} {
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.address_of(@_QMcon2Ezzz) : !fir.ref<i32>
+ %2 = fir.declare %1 {data_attr = #cuf.cuda<constant>, uniq_name = "_QMcon2Ezzz"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ return
+ }
+ fir.global @_QMcon2Ezzz {data_attr = #cuf.cuda<constant>} : i32
+}
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: fir.address_of(@_QMcon2Ezzz) : !fir.ref<i32>
+// CHECK-NOT: fir.call {{.*}}GetDeviceAddress
diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90
index e8951cc..ec97114 100644
--- a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90
+++ b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90
@@ -43,4 +43,4 @@ end module
! TODO: After using select type - the appropriate type category should be
! possible. Add the rest of the test once OpenACC lowering correctly handles
-! unlimited polymorhic.
+! unlimited polymorphic.
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 38d5111..30ed2f0 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -252,7 +252,7 @@ func.func @_QPomp_target_data() {
%c0_6 = arith.constant 0 : index
%10 = arith.subi %c1024_1, %c1_5 : index
%11 = omp.map.bounds lower_bound(%c0_6 : index) upper_bound(%10 : index) extent(%c1024_1 : index) stride(%c1_5 : index) start_idx(%c1_5 : index)
- %12 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%11) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
+ %12 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(always, storage) capture(ByRef) bounds(%11) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
omp.target_enter_data map_entries(%6, %9, %12 : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
%c1_7 = arith.constant 1 : index
%c0_8 = arith.constant 0 : index
@@ -268,7 +268,7 @@ func.func @_QPomp_target_data() {
%c0_12 = arith.constant 0 : index
%19 = arith.subi %c1024_1, %c1_11 : index
%20 = omp.map.bounds lower_bound(%c0_12 : index) upper_bound(%19 : index) extent(%c1024_1 : index) stride(%c1_11 : index) start_idx(%c1_11 : index)
- %21 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%20) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
+ %21 = omp.map.info var_ptr(%2 : !fir.ref<!fir.array<1024xi32>>, !fir.array<1024xi32>) map_clauses(storage) capture(ByRef) bounds(%20) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
%c1_13 = arith.constant 1 : index
%c0_14 = arith.constant 0 : index
%22 = arith.subi %c1024_2, %c1_13 : index
@@ -305,7 +305,7 @@ func.func @_QPomp_target_data() {
// CHECK: %[[VAL_23:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[VAL_24:.*]] = llvm.mlir.constant(1023 : index) : i64
// CHECK: %[[VAL_25:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_22]] : i64) start_idx(%[[VAL_22]] : i64)
-// CHECK: %[[VAL_26:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_25]]) -> !llvm.ptr {name = "c"}
+// CHECK: %[[VAL_26:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, storage) capture(ByRef) bounds(%[[VAL_25]]) -> !llvm.ptr {name = "c"}
// CHECK: omp.target_enter_data map_entries(%[[VAL_16]], %[[VAL_21]], %[[VAL_26]] : !llvm.ptr, !llvm.ptr, !llvm.ptr)
// CHECK: %[[VAL_27:.*]] = llvm.mlir.constant(1 : index) : i64
// CHECK: %[[VAL_28:.*]] = llvm.mlir.constant(0 : index) : i64
@@ -321,7 +321,7 @@ func.func @_QPomp_target_data() {
// CHECK: %[[VAL_38:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(1023 : index) : i64
// CHECK: %[[VAL_40:.*]] = omp.map.bounds lower_bound(%[[VAL_38]] : i64) upper_bound(%[[VAL_39]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_37]] : i64) start_idx(%[[VAL_37]] : i64)
-// CHECK: %[[VAL_41:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_40]]) -> !llvm.ptr {name = "c"}
+// CHECK: %[[VAL_41:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(storage) capture(ByRef) bounds(%[[VAL_40]]) -> !llvm.ptr {name = "c"}
// CHECK: %[[VAL_42:.*]] = llvm.mlir.constant(1 : index) : i64
// CHECK: %[[VAL_43:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[VAL_44:.*]] = llvm.mlir.constant(1023 : index) : i64
diff --git a/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90 b/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90
new file mode 100644
index 0000000..5038f71
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-declare-common-in-function.f90
@@ -0,0 +1,40 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! Verify that a COMMON block declared with OpenACC declare inside a function
+! is lowered as a global declare (acc.global_ctor/dtor) rather than a
+! structured declare.
+
+program p
+ implicit none
+ real :: pi
+ integer :: i
+ common /COM/ pi
+!$acc declare copyin(/COM/)
+ data pi/0.0/
+
+! CHECK-DAG: acc.global_ctor @{{.*}}_acc_ctor {
+! CHECK-DAG: %[[ADDR0:.*]] = fir.address_of(@{{.*}}) {acc.declare = #acc.declare<dataClause = acc_copyin>} : {{.*}}
+! CHECK-DAG: acc.declare_enter dataOperands(%{{.*}} : {{.*}})
+! CHECK-DAG: acc.terminator
+! CHECK-DAG: }
+
+! CHECK-DAG: acc.global_dtor @{{.*}}_acc_dtor {
+! CHECK-DAG: %[[ADDR1:.*]] = fir.address_of(@{{.*}}) {acc.declare = #acc.declare<dataClause = acc_copyin>} : !fir.ref<tuple<f32>>
+! CHECK-DAG: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[ADDR1]] : !fir.ref<tuple<f32>>) -> !fir.ref<tuple<f32>> {dataClause = #acc<data_clause acc_copyin>, {{.*}}}
+! CHECK-DAG: acc.declare_exit dataOperands(%[[GDP]] : !fir.ref<tuple<f32>>)
+! CHECK-DAG: acc.delete accPtr(%[[GDP]] : !fir.ref<tuple<f32>>) {dataClause = #acc<data_clause acc_copyin>{{.*}}}
+! CHECK-DAG: acc.terminator
+! CHECK-DAG: }
+
+contains
+
+ subroutine s()
+ implicit none
+ real :: pi
+ common /COM/ pi
+!$acc declare copyin(/COM/)
+ end subroutine s
+
+end program p
+
+
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90
index 39f9738..126f341 100644
--- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90
@@ -23,9 +23,9 @@ program test_default_implicit_firstprivate
!CHECK: %[[VAL_4:.*]] = fir.declare %{{.*}} {uniq_name = "_QFEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
!CHECK: %[[VAL_5:.*]] = fir.declare %{{.*}} {uniq_name = "_QFExdgfx"} : (!fir.ref<i32>) -> !fir.ref<i32>
!CHECK: %[[VAL_6:.*]] = fir.declare %{{.*}} {uniq_name = "_QFExfpvx"} : (!fir.ref<i32>) -> !fir.ref<i32>
-!CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
-!CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_3]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "j"}
-!CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "k"}
+!CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+!CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_3]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "j"}
+!CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "k"}
!CHECK: %[[VAL_10:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>>
!CHECK: %[[VAL_11:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_10]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>>) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>> {name = ""}
!CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>>, !fir.box<!fir.heap<!fir.array<?x?x?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_11]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xi32>>>> {name = "allocarr"}
diff --git a/flang/test/Lower/OpenMP/common-block-map.f90 b/flang/test/Lower/OpenMP/common-block-map.f90
index a0a1b1f..7c690c9 100644
--- a/flang/test/Lower/OpenMP/common-block-map.f90
+++ b/flang/test/Lower/OpenMP/common-block-map.f90
@@ -36,7 +36,7 @@ end
!CHECK: %[[CONV:.*]] = fir.convert %[[COORD]] : (!fir.ref<i8>) -> !fir.ref<i32>
!CHECK: %[[CB_MEMBER_2:.*]]:2 = hlfir.declare %[[CONV]] storage(%[[COMMON_BLOCK]][4]) {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref<i32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[MAP_EXP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_2]]#1 : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "var2"}
-!CHECK: %[[MAP_IMP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "var1"}
+!CHECK: %[[MAP_IMP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "var1"}
!CHECK: omp.target map_entries(%[[MAP_EXP]] -> %[[ARG_EXP:.*]], %[[MAP_IMP]] -> %[[ARG_IMP:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
!CHECK: %[[EXP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_EXP]] {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[IMP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_IMP]] {uniq_name = "_QFmap_mix_of_membersEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90
index 3d4d0da..c389d0f 100644
--- a/flang/test/Lower/OpenMP/declare-mapper.f90
+++ b/flang/test/Lower/OpenMP/declare-mapper.f90
@@ -80,7 +80,7 @@ subroutine declare_mapper_2
!CHECK: %[[VAL_8:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : index) upper_bound(%[[VAL_7]] : index) extent(%[[VAL_2]] : index) stride(%[[VAL_5]] : index) start_idx(%[[VAL_5]] : index)
!CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !fir.ref<!fir.array<250xf32>>, !fir.array<250xf32>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_8]]) -> !fir.ref<!fir.array<250xf32>> {name = "v%[[VAL_10:.*]]"}
!CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_1]]#0{"temp"} : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
- !CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> {name = "v%[[VAL_13:.*]]"}
+ !CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>) map_clauses(storage) capture(ByRef) -> !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> {name = "v%[[VAL_13:.*]]"}
!CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<[[MY_TYPE]]>, [[MY_TYPE]]) map_clauses(tofrom) capture(ByRef) members(%[[VAL_9]], %[[VAL_12]] : [3], [1] : !fir.ref<!fir.array<250xf32>>, !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<[[MY_TYPE]]> {name = "v", partial_map = true}
!CHECK: omp.declare_mapper.info map_entries(%[[VAL_14]], %[[VAL_9]], %[[VAL_12]] : !fir.ref<[[MY_TYPE]]>, !fir.ref<!fir.array<250xf32>>, !fir.ref<!fir.type<_QFdeclare_mapper_2Tmy_type{num_vals:i32,values:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>)
!CHECK: }
diff --git a/flang/test/Lower/OpenMP/defaultmap.f90 b/flang/test/Lower/OpenMP/defaultmap.f90
index 0b26f5d..b9c902f 100644
--- a/flang/test/Lower/OpenMP/defaultmap.f90
+++ b/flang/test/Lower/OpenMP/defaultmap.f90
@@ -5,7 +5,7 @@ subroutine defaultmap_allocatable_present()
implicit none
integer, dimension(:), allocatable :: arr
-! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, present, exit_release_or_enter_alloc) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, present) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr"}
!$omp target defaultmap(present: allocatable)
arr(1) = 10
@@ -32,7 +32,7 @@ subroutine defaultmap_all_default()
integer :: aggregate(16)
integer :: scalar_int
-! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"}
+! CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"}
! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
! CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "arr"}
! CHECK: %[[MAP_4:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.array<16xi32>>, !fir.array<16xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{.*}}) -> !fir.ref<!fir.array<16xi32>> {name = "aggregate"}
@@ -54,7 +54,7 @@ subroutine defaultmap_pointer_to()
! CHECK-FPRIV: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr({{.*}}) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
! CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members({{.*}}) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "arr_ptr"}
! CHECK-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32>
-! CHECK-NO-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"}
+! CHECK-NO-FPRIV: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "scalar_int"}
!$omp target defaultmap(to: pointer)
arr_ptr(1) = scalar_int + 20
!$omp end target
diff --git a/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90 b/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90
index 8d8c043..e7bced4 100644
--- a/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90
+++ b/flang/test/Lower/OpenMP/has_device_addr-mapinfo.f90
@@ -17,7 +17,7 @@ end
! Check that the map.info for `a` only takes a single parameter.
-!CHECK-DAG: %[[MAP_A:[0-9]+]] = "omp.map.info"(%[[STORAGE_A:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = 517 : ui64, name = "a", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = !fir.box<!fir.array<?xi32>>}> : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>>
-!CHECK-DAG: %[[MAP_T:[0-9]+]] = "omp.map.info"(%[[STORAGE_T:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = 2 : ui64, name = "t", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = i32}> : (!fir.ref<i32>) -> !fir.ref<i32>
+!CHECK-DAG: %[[MAP_A:[0-9]+]] = "omp.map.info"(%[[STORAGE_A:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = #omp<clause_map_flags to|always|implicit>, name = "a", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = !fir.box<!fir.array<?xi32>>}> : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>>
+!CHECK-DAG: %[[MAP_T:[0-9]+]] = "omp.map.info"(%[[STORAGE_T:[0-9#]+]]) <{map_capture_type = #omp<variable_capture_kind(ByRef)>, map_type = #omp<clause_map_flags from>, name = "t", operandSegmentSizes = array<i32: 1, 0, 0, 0>, partial_map = false, var_type = i32}> : (!fir.ref<i32>) -> !fir.ref<i32>
!CHECK: "omp.target"(%[[MAP_A]], %[[MAP_T]])
diff --git a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90
index ab2cdf3..76dba67 100644
--- a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90
+++ b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90
@@ -11,7 +11,7 @@
!HLFIRDIALECT: %[[B_DECLARE:.*]]:2 = hlfir.declare %[[B_ALLOCA]](%[[B_SHAPE]]) {uniq_name = "_QFlocal_variable_intrinsic_sizeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
!HLFIRDIALECT: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}} : index) upper_bound({{.*}} : index) extent({{.*}} : index) stride({{.*}} : index) start_idx({{.*}} : index) {stride_in_bytes = true}
!HLFIRDIALECT: %[[MAP_DATA_B:.*]] = omp.map.info var_ptr(%[[B_DECLARE]]#1 : !fir.ref<!fir.array<?xf32>>, f32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xf32>> {name = "b"}
-!HLFIRDIALECT: %[[MAP_DATA_SZ:.*]] = omp.map.info var_ptr(%[[SZ_DATA]] : !fir.ref<index>, index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<index> {name = ""}
+!HLFIRDIALECT: %[[MAP_DATA_SZ:.*]] = omp.map.info var_ptr(%[[SZ_DATA]] : !fir.ref<index>, index) map_clauses(implicit) capture(ByCopy) -> !fir.ref<index> {name = ""}
!HLFIRDIALECT: omp.target map_entries(%[[MAP_DATA_B]] -> %[[ARG1:.*]], %[[MAP_DATA_SZ]] -> %[[ARG2:.*]] : !fir.ref<!fir.array<?xf32>>, !fir.ref<index>) {
!HLFIRDIALECT: %[[SZ_LD:.*]] = fir.load %[[ARG2]] : !fir.ref<index>
!HLFIRDIALECT: %[[SZ_CONV:.*]] = fir.convert %[[SZ_LD]] : (index) -> i64
diff --git a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 b/flang/test/Lower/OpenMP/optional-argument-map-2.f90
index a774407..791d509 100644
--- a/flang/test/Lower/OpenMP/optional-argument-map-2.f90
+++ b/flang/test/Lower/OpenMP/optional-argument-map-2.f90
@@ -96,7 +96,7 @@ end module mod
! CHECK-NO-FPRIV: }
! CHECK-NO-FPRIV: %[[VAL_13:.*]] = arith.subi %[[VAL_14:.*]]#0, %[[VAL_10]] : index
! CHECK-NO-FPRIV: %[[VAL_15:.*]] = omp.map.bounds lower_bound(%[[VAL_9]] : index) upper_bound(%[[VAL_13]] : index) extent(%[[VAL_14]]#0 : index) stride(%[[VAL_14]]#1 : index) start_idx(%[[VAL_9]] : index) {stride_in_bytes = true}
-! CHECK-NO-FPRIV: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%[[VAL_15]]) -> !fir.ref<!fir.char<1,?>> {name = "a"}
+! CHECK-NO-FPRIV: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(implicit) capture(ByCopy) bounds(%[[VAL_15]]) -> !fir.ref<!fir.char<1,?>> {name = "a"}
! CHECK-NO-FPRIV: fir.store %[[ARG0]] to %[[VAL_0]] : !fir.ref<!fir.boxchar<1>>
! CHECK-NO-FPRIV: %[[VAL_17:.*]] = arith.constant 0 : index
! CHECK-NO-FPRIV: %[[VAL_18:.*]] = arith.constant 1 : index
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 1aef64a..26bd62e 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -69,7 +69,7 @@ subroutine omp_target_enter_mt
!CHECK: %[[BOUNDS_1:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
!CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr(%{{.*}}) map_clauses(to) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"}
!CHECK: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
- !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
+ !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, storage) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
!CHECK: %[[BOUNDS_3:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
!CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(to) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"}
!CHECK: omp.target_enter_data map_entries(%[[MAP_0]], %[[MAP_1]], %[[MAP_2]], %[[MAP_3]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
@@ -150,9 +150,9 @@ subroutine omp_target_exit_mt
!CHECK: %[[BOUNDS_1:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
!CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(from) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.array<1024xi32>> {name = "b"}
!CHECK: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
- !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
+ !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(storage) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.array<1024xi32>> {name = "c"}
!CHECK: %[[BOUNDS_3:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
- !CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, delete) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"}
+ !CHECK: %[[MAP_3:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(always, delete, storage) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref<!fir.array<1024xi32>> {name = "d"}
!CHECK: %[[BOUNDS_4:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
!CHECK: %[[MAP_4:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(from) capture(ByRef) bounds(%[[BOUNDS_4]]) -> !fir.ref<!fir.array<1024xi32>> {name = "e"}
!CHECK: omp.target_exit_data map_entries(%[[MAP_0]], %[[MAP_1]], %[[MAP_2]], %[[MAP_3]], %[[MAP_4]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
@@ -482,7 +482,7 @@ subroutine omp_target_implicit_bounds(n)
integer :: a(n)
!CHECK: %[[VAL_14:.*]] = omp.map.bounds lower_bound(%c0{{.*}} : index) upper_bound(%[[UB]] : index) extent(%[[VAL_7]] : index) stride(%c1{{.*}} : index) start_idx(%c1{{.*}} : index)
!CHECK: %[[VAL_15:.*]] = omp.map.info var_ptr(%[[VAL_10]]#1 : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[VAL_14]]) -> !fir.ref<!fir.array<?xi32>> {name = "a"}
- !CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_COPY]] : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = ""}
+ !CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_COPY]] : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = ""}
!CHECK: omp.target map_entries(%[[VAL_15]] -> %[[VAL_17:.*]], %[[VAL_16]] -> %[[VAL_18:.*]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>) {
!$omp target
!CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
@@ -642,8 +642,8 @@ subroutine target_unstructured
integer :: i = 1
!CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtarget_unstructuredEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
integer :: j = 11
- !CHECK-NO-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
- !CHECK-NO-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "j"}
+ !CHECK-NO-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+ !CHECK-NO-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32> {name = "j"}
!CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
!CHECK-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#0 : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32>
!CHECK-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#0 : !fir.ref<i32>, i32) map_clauses(to) capture(ByCopy) -> !fir.ref<i32>
diff --git a/flang/test/Preprocessing/bug164470.cuf b/flang/test/Preprocessing/bug164470.cuf
new file mode 100644
index 0000000..3e959f4
--- /dev/null
+++ b/flang/test/Preprocessing/bug164470.cuf
@@ -0,0 +1,6 @@
+!RUN: %flang_fc1 -x cuda -fdebug-unparse %s 2>&1 | FileCheck %s
+!CHECK: ATTRIBUTES(DEVICE) FUNCTION foo()
+!@cuf attributes(device) &
+function foo()
+ foo = 1.
+end
diff --git a/flang/test/Semantics/OpenACC/bug1583.f90 b/flang/test/Semantics/OpenACC/bug1583.f90
new file mode 100644
index 0000000..7778d46
--- /dev/null
+++ b/flang/test/Semantics/OpenACC/bug1583.f90
@@ -0,0 +1,23 @@
+! RUN: %python %S/../test_symbols.py %s %flang_fc1 -fopenacc
+!DEF: /m Module
+module m
+ !DEF: /m/t PUBLIC DerivedType
+ type :: t
+ !DEF: /m/t/c ALLOCATABLE ObjectEntity REAL(4)
+ real, allocatable :: c(:)
+ end type
+contains
+ !DEF: /m/sub PUBLIC (Subroutine) Subprogram
+ !DEF: /m/sub/v ObjectEntity TYPE(t)
+ subroutine sub (v)
+ !REF: /m/t
+ !REF: /m/sub/v
+ type(t) :: v
+!$acc host_data use_device(v%c)
+ !DEF: /foo EXTERNAL (Subroutine) ProcEntity
+ !REF: /m/sub/v
+ !REF: /m/t/c
+ call foo(v%c)
+!$acc end host_data
+ end subroutine
+end module
diff --git a/flang/test/Semantics/bug164303.f90 b/flang/test/Semantics/bug164303.f90
new file mode 100644
index 0000000..c356c07
--- /dev/null
+++ b/flang/test/Semantics/bug164303.f90
@@ -0,0 +1,31 @@
+!RUN: %flang -fc1 -fsyntax-only %s 2>&1 | FileCheck --allow-empty %s
+module foo_mod
+ use, intrinsic :: iso_fortran_env
+ use, intrinsic :: iso_c_binding
+ implicit none
+
+ interface new_foo
+ procedure :: foo_ctor
+ end interface
+
+contains
+
+function foo_ctor(options) result(retval)
+ implicit none
+ integer, intent(in) :: options
+ integer :: retval
+
+ interface
+!CHECK-NOT: error:
+ subroutine new_foo(f, opt) bind(c, name='new_foo')
+ import
+ implicit none
+ integer, intent(inout) :: f
+ integer(c_int), intent(in) :: opt
+ end subroutine
+ end interface
+
+ call new_foo(retval, options)
+end function
+
+end module
diff --git a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 b/flang/test/Transforms/DoConcurrent/map_shape_info.f90
index 3dca134..40f66c1 100644
--- a/flang/test/Transforms/DoConcurrent/map_shape_info.f90
+++ b/flang/test/Transforms/DoConcurrent/map_shape_info.f90
@@ -30,12 +30,12 @@ end program do_concurrent_shape
! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info
! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index)
-! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: map_clauses(implicit)
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim0"}
! CHECK: %[[DIM1_EXT_MAP:.*]] = omp.map.info
! CHECK-SAME: var_ptr(%[[DIM1_EXT]] : !fir.ref<index>, index)
-! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: map_clauses(implicit)
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim1"}
! CHECK: omp.target host_eval({{.*}}) map_entries(
@@ -79,12 +79,12 @@ end subroutine do_concurrent_shape_shift
! CHECK: %[[DIM0_STRT_MAP:.*]] = omp.map.info
! CHECK-SAME: var_ptr(%[[DIM0_STRT]] : !fir.ref<index>, index)
-! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: map_clauses(implicit)
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.start_idx.dim0"}
! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info
! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index)
-! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: map_clauses(implicit)
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.extent.dim0"}
! CHECK: omp.target host_eval({{.*}}) map_entries(
diff --git a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
index b6b2136..af48eb4 100644
--- a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
+++ b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
@@ -24,7 +24,7 @@ end subroutine test_non_refernece
! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
-! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: map_clauses(implicit)
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = ""}
diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir
index 04e60ca..aef72e4 100644
--- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir
@@ -12,10 +12,10 @@
// CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "ub"}
// CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "step"}
// CHECK: %[[VAL_6:.*]] = omp.map.info var_ptr(%[[ARG3:.*]] : !fir.ref<index>, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref<index> {name = "addr"}
-// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "lb"}
-// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "ub"}
-// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "step"}
-// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "addr"}
+// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "lb"}
+// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "ub"}
+// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "step"}
+// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "addr"}
// CHECK: omp.target_data map_entries(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]], %[[VAL_6]] : !fir.ref<index>, !fir.ref<index>, !fir.ref<index>, !fir.ref<index>) {
// CHECK: %[[VAL_11:.*]] = fir.alloca index
// CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<index>, index) map_clauses(from) capture(ByRef) -> !fir.ref<index> {name = "__flang_workdistribute_from"}
diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir
index 062eb70..25f0350 100644
--- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir
@@ -12,10 +12,10 @@
// CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "ub"}
// CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(to) capture(ByRef) -> !fir.ref<index> {name = "step"}
// CHECK: %[[VAL_6:.*]] = omp.map.info var_ptr(%[[ARG3:.*]] : !fir.ref<index>, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref<index> {name = "addr"}
-// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "lb"}
-// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "ub"}
-// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "step"}
-// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref<index> {name = "addr"}
+// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "lb"}
+// CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "ub"}
+// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "step"}
+// CHECK: %[[VAL_10:.*]] = omp.map.info var_ptr(%[[ARG3]] : !fir.ref<index>, index) map_clauses(storage) capture(ByRef) -> !fir.ref<index> {name = "addr"}
// CHECK: omp.target_data map_entries(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]], %[[VAL_6]] : !fir.ref<index>, !fir.ref<index>, !fir.ref<index>, !fir.ref<index>) {
// CHECK: %[[VAL_11:.*]] = fir.alloca index
// CHECK: %[[VAL_12:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<index>, index) map_clauses(from) capture(ByRef) -> !fir.ref<index> {name = "__flang_workdistribute_from"}
diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir
index 7bc0ae4..b30a2fc 100644
--- a/flang/test/Transforms/omp-map-info-finalization.fir
+++ b/flang/test/Transforms/omp-map-info-finalization.fir
@@ -257,7 +257,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
%1 = omp.map.bounds lower_bound(%c1_15 : index) upper_bound(%c1_15 : index) extent(%c1_15 : index) stride(%c1_15 : index) start_idx(%c1_15 : index) {stride_in_bytes = true}
%2 = fir.coordinate_of %0#0, vertexes : (!fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>
%3 = omp.map.bounds lower_bound(%c1_15 : index) upper_bound(%c1_15 : index) extent(%c1_15 : index) stride(%c1_15 : index) start_idx(%c1_15 : index) {stride_in_bytes = true}
- %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%3) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"}
+ %4 = omp.map.info var_ptr(%2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) map_clauses(storage) capture(ByRef) bounds(%3) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"}
%5 = fir.load %2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>
%c2_i64 = arith.constant 2 : i64
%c1_20 = arith.constant 1 : index
@@ -266,7 +266,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
%8 = fir.coordinate_of %5, %7 : (!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, index) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
%9 = fir.coordinate_of %8, vertexy : (!fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
%10 = omp.map.info var_ptr(%9 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%1) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"}
- %11 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>, !fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%4, %10 : [1], [1,2] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>> {name = "alloca_dtype", partial_map = true}
+ %11 = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>, !fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>) map_clauses(storage) capture(ByRef) members(%4, %10 : [1], [1,2] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>> {name = "alloca_dtype", partial_map = true}
omp.target map_entries(%11 -> %arg1 : !fir.ref<!fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box<!fir.heap<!fir.array<?x!fir.type<_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>,array_i:!fir.array<10xi32>}>>) {
omp.terminator
}
@@ -277,7 +277,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
// CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ARG0]] {{.*}} : (!fir.ref<!fir.type<[[REC_TY]]>>) -> (!fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.type<[[REC_TY]]>>)
// CHECK: %[[DESC_1:.*]] = fir.coordinate_of %[[DECLARE]]#0, vertexes : (!fir.ref<!fir.type<[[REC_TY]]>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2:_QFmaptype_nested_derived_type_member_idxTvertexes{test:i32,vertexx:!fir.box<!fir.heap<!fir.array<\?xi32>>>,vertexy:!fir.box<!fir.heap<!fir.array<\?xi32>>>}]]>>>>>
// CHECK: %[[BASE_ADDR_1:.*]] = fir.box_offset %[[DESC_1]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>
-// CHECK: %[[BASE_ADDR_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.type<[[REC_TY2]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_1]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>> {{.*}}
+// CHECK: %[[BASE_ADDR_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.type<[[REC_TY2]]>) map_clauses(storage) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_1]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>> {{.*}}
// CHECK: %[[DESC_MAP_1:.*]] = omp.map.info var_ptr(%[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>> {{.*}}
// CHECK: %[[DESC_LD_1:.*]] = fir.load %[[DESC_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>
// CHECK: %[[MEMBER_ACCESS_1:.*]] = fir.coordinate_of %[[DESC_LD_1]], %{{.*}} : (!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, index) -> !fir.ref<!fir.type<[[REC_TY2]]>>
@@ -285,7 +285,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref<!fir.box<!fir.heap<!fi
// CHECK: %[[BASE_ADDR_2:.*]] = fir.box_offset %[[DESC_2]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
// CHECK: %[[BASE_ADDR_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_2]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {{.*}}
// CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}}
-// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true}
+// CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.type<[[REC_TY]]>) map_clauses(storage) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.type<[[REC_TY]]>> {{{.*}} partial_map = true}
// CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref<!fir.type<[[REC_TY]]>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<[[REC_TY2]]>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x!fir.type<[[REC_TY2]]>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
// -----
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 8bf6c44..714120a 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
+ libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index dffccba..f6bbb34 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
+ libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index b4ab073..7a8d74a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
+ libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs
@@ -1373,6 +1374,11 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.wchar.wcstombs
libc.src.wchar.wcsrtombs
libc.src.wchar.wcsnrtombs
+
+ # nl_types.h entrypoints
+ libc.src.nl_types.catopen
+ libc.src.nl_types.catclose
+ libc.src.nl_types.catgets
)
endif()
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 0573851..d0f62eb 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -19,6 +19,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.malloc
libc.include.math
libc.include.netinet_in
+ libc.include.nl_types
libc.include.poll
libc.include.pthread
libc.include.sched
diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst
index aeeaf17..4f8ac22 100644
--- a/libc/docs/dev/undefined_behavior.rst
+++ b/libc/docs/dev/undefined_behavior.rst
@@ -156,3 +156,10 @@ parsed as normal. For l64a it's unspecified what happens if the input value is
negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints.
Additionally, the return of l64a is in a thread-local buffer that's overwritten
on each call.
+
+`inet_aton` and Non-Standard Binary Integers
+--------------------------------------------
+The current implementation of the `inet_aton` function utilizes the same code
+as `strtol` to parse IPv4 numbers-and-dots notations. This approach may permit
+the use of binary integers (prefixed with 0b), which is not supported by the
+standard.
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index a5c1878..09f169b 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -771,6 +771,14 @@ add_header_macro(
.llvm-libc-macros.poll-macros
)
+add_header_macro(
+ nl_types
+ ../libc/include/nl_types.yaml
+ nl_types.h
+ DEPENDS
+ .llvm-libc-types.nl_catd
+ )
+
# UEFI spec references "Uefi.h" so we use that name for compatibility
add_header_macro(
uefi
diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml
index 10cd56d..6e06290 100644
--- a/libc/include/arpa/inet.yaml
+++ b/libc/include/arpa/inet.yaml
@@ -1,7 +1,8 @@
header: arpa/inet.h
header_template: inet.h.def
macros: []
-types: []
+types:
+ - type_name: in_addr
enums: []
objects: []
functions:
@@ -17,6 +18,13 @@ functions:
return_type: uint16_t
arguments:
- type: uint16_t
+ - name: inet_aton
+ standards:
+ - llvm_libc_ext
+ return_type: int
+ arguments:
+ - type: const char *
+ - type: in_addr *
- name: ntohl
standards:
- POSIX
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 5f506c4..a428a0e 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -46,6 +46,7 @@ add_header(mbstate_t HDR mbstate_t.h)
add_header(mode_t HDR mode_t.h)
add_header(mtx_t HDR mtx_t.h DEPENDS .__futex_word .__mutex_type)
add_header(nfds_t HDR nfds_t.h)
+add_header(nl_catd HDR nl_catd.h)
add_header(nlink_t HDR nlink_t.h)
add_header(off_t HDR off_t.h)
add_header(once_flag HDR once_flag.h DEPENDS .__futex_word)
diff --git a/libc/include/llvm-libc-types/nl_catd.h b/libc/include/llvm-libc-types/nl_catd.h
new file mode 100644
index 0000000..ccdb020
--- /dev/null
+++ b/libc/include/llvm-libc-types/nl_catd.h
@@ -0,0 +1,14 @@
+//===-- Definition of nl_catd type ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_NL_CATD_H
+#define LLVM_LIBC_TYPES_NL_CATD_H
+
+typedef void *nl_catd;
+
+#endif // LLVM_LIBC_TYPES_NL_CATD_H
diff --git a/libc/include/nl_types.yaml b/libc/include/nl_types.yaml
new file mode 100644
index 0000000..aecbb44
--- /dev/null
+++ b/libc/include/nl_types.yaml
@@ -0,0 +1,31 @@
+header: nl_types.h
+standards:
+ - posix
+macros: []
+types:
+ - type_name: nl_catd
+enums: []
+objects: []
+functions:
+ - name: catopen
+ standards:
+ - posix
+ return_type: nl_catd
+ arguments:
+ - type: const char *
+ - type: int
+ - name: catclose
+ standards:
+ - posix
+ return_type: int
+ arguments:
+ - type: nl_catd
+ - name: catgets
+ standards:
+ - posix
+ return_type: char *
+ arguments:
+ - type: nl_catd
+ - type: int
+ - type: int
+ - type: const char*
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index d7a1e1f..b2afe0a 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -37,6 +37,7 @@ add_subdirectory(arpa)
add_subdirectory(assert)
add_subdirectory(compiler)
add_subdirectory(locale)
+add_subdirectory(nl_types)
add_subdirectory(search)
add_subdirectory(setjmp)
add_subdirectory(signal)
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index 9affced..3913f7a 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -144,8 +144,9 @@ LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
return r;
}
-LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
- DoubleDouble r = exact_mult(a, b.hi);
+template <typename T = double>
+LIBC_INLINE NumberPair<T> quick_mult(T a, const NumberPair<T> &b) {
+ NumberPair<T> r = exact_mult(a, b.hi);
r.lo = multiply_add(a, b.lo, r.lo);
return r;
}
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 47bb328..6209000 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -926,6 +926,7 @@ add_header_library(
sincosf_utils
HDRS
sincosf_utils.h
+ sincosf_float_eval.h
DEPENDS
.range_reduction
libc.src.__support.FPUtil.fp_bits
diff --git a/libc/src/__support/math/cosf.h b/libc/src/__support/math/cosf.h
index 074be0b..48ba71a 100644
--- a/libc/src/__support/math/cosf.h
+++ b/libc/src/__support/math/cosf.h
@@ -9,7 +9,6 @@
#ifndef LIBC_SRC___SUPPORT_MATH_COSF_H
#define LIBC_SRC___SUPPORT_MATH_COSF_H
-#include "sincosf_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/except_value_utils.h"
@@ -18,6 +17,26 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \
+ defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+
+#include "sincosf_float_eval.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+LIBC_INLINE static constexpr float cosf(float x) {
+ return sincosf_float_eval::sincosf_eval</*IS_SIN*/ false>(x);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#else // !LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
+
+#include "sincosf_utils.h"
+
namespace LIBC_NAMESPACE_DECL {
namespace math {
@@ -51,7 +70,6 @@ LIBC_INLINE static constexpr float cosf(float x) {
xbits.set_sign(Sign::POS);
uint32_t x_abs = xbits.uintval();
- double xd = static_cast<double>(xbits.get_val());
// Range reduction:
// For |x| > pi/16, we perform range reduction as follows:
@@ -90,6 +108,7 @@ LIBC_INLINE static constexpr float cosf(float x) {
// computed using degree-7 and degree-6 minimax polynomials generated by
// Sollya respectively.
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// |x| < 0x1.0p-12f
if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
// When |x| < 2^-12, the relative error of the approximation cos(x) ~ 1
@@ -108,12 +127,12 @@ LIBC_INLINE static constexpr float cosf(float x) {
// emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f);
-#else
+#else // !LIBC_TARGET_CPU_HAS_FMA_FLOAT
+ double xd = static_cast<double>(xbits.get_val());
return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0));
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (auto r = COSF_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value()))
return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -132,6 +151,7 @@ LIBC_INLINE static constexpr float cosf(float x) {
return x + FPBits::quiet_nan().get_val();
}
+ double xd = static_cast<double>(xbits.get_val());
// Combine the results with the sine of sum formula:
// cos(x) = cos((k + y)*pi/32)
// = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
@@ -150,3 +170,5 @@ LIBC_INLINE static constexpr float cosf(float x) {
} // namespace LIBC_NAMESPACE_DECL
#endif // LIBC_SRC___SUPPORT_MATH_COSF_H
+
+#endif // LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
diff --git a/libc/src/__support/math/sincosf_float_eval.h b/libc/src/__support/math/sincosf_float_eval.h
new file mode 100644
index 0000000..836e928
--- /dev/null
+++ b/libc/src/__support/math/sincosf_float_eval.h
@@ -0,0 +1,223 @@
+//===-- Compute sin + cos for small angles ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace sincosf_float_eval {
+
+// Since the worst case of `x mod pi` in single precision is > 2^-28, in order
+// to be bounded by 1 ULP, the range reduction accuracy will need to be at
+// least 2^(-28 - 23) = 2^-51.
+// For fast small range reduction, we will compute as follow:
+// Let pi ~ c0 + c1 + c2
+// with |c1| < ulp(c0)/2 and |c2| < ulp(c1)/2
+// then:
+// k := nearest_int(x * 1/pi);
+// u = (x - k * c0) - k * c1 - k * c2
+// We requires k * c0, k * c1 to be exactly representable in single precision.
+// Let p_k be the precision of k, then the precision of c0 and c1 are:
+// 24 - p_k,
+// and the ulp of (k * c2) is 2^(-3 * (24 - p_k)).
+// This give us the following bound on the precision of k:
+// 3 * (24 - p_k) >= 51,
+// or equivalently:
+// p_k <= 7.
+// We set the bound for p_k to be 6 so that we can have some more wiggle room
+// for computations.
+LIBC_INLINE static unsigned sincosf_range_reduction_small(float x, float &u) {
+ // > display=hexadecimal;
+ // > a = round(pi/8, 18, RN);
+ // > b = round(pi/8 - a, 18, RN);
+ // > c = round(pi/8 - a - b, SG, RN);
+ // > round(8/pi, SG, RN);
+ constexpr float MPI[3] = {-0x1.921f8p-2f, -0x1.aa22p-21f, -0x1.68c234p-41f};
+ constexpr float ONE_OVER_PI = 0x1.45f306p+1f;
+ float prod_hi = x * ONE_OVER_PI;
+ float k = fputil::nearest_integer(prod_hi);
+
+ float y_hi = fputil::multiply_add(k, MPI[0], x); // Exact
+ u = fputil::multiply_add(k, MPI[1], y_hi);
+ u = fputil::multiply_add(k, MPI[2], u);
+ return static_cast<unsigned>(static_cast<int>(k));
+}
+
+// TODO: Add non-FMA version of large range reduction.
+LIBC_INLINE static unsigned sincosf_range_reduction_large(float x, float &u) {
+ // > for i from 0 to 13 do {
+ // if i < 2 then { pi_inv = 0.25 + 2^(8*(i - 2)) / pi; }
+ // else { pi_inv = 2^(8*(i-2)) / pi; };
+ // pn = nearestint(pi_inv);
+ // pi_frac = pi_inv - pn;
+ // a = round(pi_frac, SG, RN);
+ // b = round(pi_frac - a, SG, RN);
+ // c = round(pi_frac - a - b, SG, RN);
+ // d = round(pi_frac - a - b - c, SG, RN);
+ // print("{", 2^3 * a, ",", 2^3 * b, ",", 2^3 * c, ",", 2^3 * d, "},");
+ // };
+ constexpr float EIGHT_OVER_PI[14][4] = {
+ {0x1.000146p1f, -0x1.9f246cp-28f, -0x1.bbead6p-54f, -0x1.ec5418p-85f},
+ {0x1.0145f4p1f, -0x1.f246c6p-24f, -0x1.df56bp-49f, -0x1.ec5418p-77f},
+ {0x1.45f306p1f, 0x1.b9391p-24f, 0x1.529fc2p-50f, 0x1.d5f47ep-76f},
+ {0x1.f306dcp1f, 0x1.391054p-24f, 0x1.4fe13ap-49f, 0x1.7d1f54p-74f},
+ {-0x1.f246c6p0f, -0x1.df56bp-25f, -0x1.ec5418p-53f, 0x1.f534dep-78f},
+ {-0x1.236378p1f, 0x1.529fc2p-26f, 0x1.d5f47ep-52f, -0x1.65912p-77f},
+ {0x1.391054p0f, 0x1.4fe13ap-25f, 0x1.7d1f54p-50f, -0x1.6447e4p-75f},
+ {0x1.1054a8p0f, -0x1.ec5418p-29f, 0x1.f534dep-54f, -0x1.f924ecp-81f},
+ {0x1.529fc2p-2f, 0x1.d5f47ep-28f, -0x1.65912p-53f, 0x1.b6c52cp-79f},
+ {-0x1.ac07b2p1f, 0x1.5f47d4p-24f, 0x1.a6ee06p-49f, 0x1.b6295ap-74f},
+ {-0x1.ec5418p-5f, 0x1.f534dep-30f, -0x1.f924ecp-57f, 0x1.5993c4p-82f},
+ {0x1.3abe9p-1f, -0x1.596448p-27f, 0x1.b6c52cp-55f, -0x1.9b0ef2p-80f},
+ {-0x1.505c16p1f, 0x1.a6ee06p-25f, 0x1.b6295ap-50f, -0x1.b0ef1cp-76f},
+ {-0x1.70565ap-1f, 0x1.dc0db6p-26f, 0x1.4acc9ep-53f, 0x1.0e4108p-80f},
+ };
+
+ using FPBits = typename fputil::FPBits<float>;
+ using fputil::FloatFloat;
+ FPBits xbits(x);
+
+ int x_e_m32 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 32);
+ unsigned idx = static_cast<unsigned>((x_e_m32 >> 3) + 2);
+ // Scale x down by 2^(-(8 * (idx - 2))
+ xbits.set_biased_exponent((x_e_m32 & 7) + FPBits::EXP_BIAS + 32);
+ // 2^32 <= |x_reduced| < 2^(32 + 8) = 2^40
+ float x_reduced = xbits.get_val();
+ // x * c_hi = ph.hi + ph.lo exactly.
+ FloatFloat ph = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][0]);
+ // x * c_mid = pm.hi + pm.lo exactly.
+ FloatFloat pm = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][1]);
+ // x * c_lo = pl.hi + pl.lo exactly.
+ FloatFloat pl = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][2]);
+ // Extract integral parts and fractional parts of (ph.lo + pm.hi).
+ float sum_hi = ph.lo + pm.hi;
+ float k = fputil::nearest_integer(sum_hi);
+
+ // x * 8/pi mod 1 ~ y_hi + y_mid + y_lo
+ float y_hi = (ph.lo - k) + pm.hi; // Exact
+ FloatFloat y_mid = fputil::exact_add(pm.lo, pl.hi);
+ float y_lo = pl.lo;
+
+ // y_l = x * c_lo_2 + pl.lo
+ float y_l = fputil::multiply_add(x_reduced, EIGHT_OVER_PI[idx][3], y_lo);
+ FloatFloat y = fputil::exact_add(y_hi, y_mid.hi);
+ y.lo += (y_mid.lo + y_l);
+
+ // Digits of pi/8, generated by Sollya with:
+ // > a = round(pi/8, SG, RN);
+ // > b = round(pi/8 - SG, D, RN);
+ constexpr FloatFloat PI_OVER_8 = {-0x1.777a5cp-27f, 0x1.921fb6p-2f};
+
+ // Error bound: with {a} denote the fractional part of a, i.e.:
+ // {a} = a - round(a)
+ // Then,
+ // | {x * 8/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-47
+ // | {x mod pi/8} - (u.hi + u.lo) | < 2 * 2^-5 * 2^-47 = 2^-51
+ u = fputil::multiply_add(y.hi, PI_OVER_8.hi, y.lo * PI_OVER_8.hi);
+
+ return static_cast<unsigned>(static_cast<int>(k));
+}
+
+template <bool IS_SIN> LIBC_INLINE static float sincosf_eval(float x) {
+ // sin(k * pi/8) for k = 0..15, generated by Sollya with:
+ // > for k from 0 to 16 do {
+ // print(round(sin(k * pi/8), SG, RN));
+ // };
+ constexpr float SIN_K_PI_OVER_8[16] = {
+ 0.0f, 0x1.87de2ap-2f, 0x1.6a09e6p-1f, 0x1.d906bcp-1f,
+ 1.0f, 0x1.d906bcp-1f, 0x1.6a09e6p-1f, 0x1.87de2ap-2f,
+ 0.0f, -0x1.87de2ap-2f, -0x1.6a09e6p-1f, -0x1.d906bcp-1f,
+ -1.0f, -0x1.d906bcp-1f, -0x1.6a09e6p-1f, -0x1.87de2ap-2f,
+ };
+
+ using FPBits = fputil::FPBits<float>;
+ FPBits xbits(x);
+ uint32_t x_abs = cpp::bit_cast<uint32_t>(x) & 0x7fff'ffffU;
+
+ float y;
+ unsigned k = 0;
+ if (x_abs < 0x4880'0000U) {
+ k = sincosf_range_reduction_small(x, y);
+ } else {
+
+ if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ if (x_abs == 0x7f80'0000U) {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ }
+ return x + FPBits::quiet_nan().get_val();
+ }
+
+ k = sincosf_range_reduction_large(x, y);
+ }
+
+ float sin_k = SIN_K_PI_OVER_8[k & 15];
+ // cos(k * pi/8) = sin(k * pi/8 + pi/2) = sin((k + 4) * pi/8).
+ // cos_k = cos(k * pi/8)
+ float cos_k = SIN_K_PI_OVER_8[(k + 4) & 15];
+
+ float y_sq = y * y;
+
+ // Polynomial approximation of sin(y) and cos(y) for |y| <= pi/16:
+ //
+ // Using Taylor polynomial for sin(y):
+ // sin(y) ~ y - y^3 / 6 + y^5 / 120
+ // Using minimax polynomial generated by Sollya for cos(y) with:
+ // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]);
+ //
+ // Error bounds:
+ // * For sin(y)
+ // > P = x - SG(1/6)*x^3 + SG(1/120) * x^5;
+ // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]);
+ // 0x1.825...p-27
+ // * For cos(y)
+ // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]);
+ // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]);
+ // 0x1.aa8...p-29
+
+ // p1 = y^2 * 1/120 - 1/6
+ float p1 = fputil::multiply_add(y_sq, 0x1.111112p-7f, -0x1.555556p-3f);
+ // q1 = y^2 * coeff(Q, 4) + coeff(Q, 2)
+ float q1 = fputil::multiply_add(y_sq, 0x1.54b8bep-5f, -0x1.ffffc4p-2f);
+ float y3 = y_sq * y;
+ // c1 ~ cos(y)
+ float c1 = fputil::multiply_add(y_sq, q1, 1.0f);
+ // s1 ~ sin(y)
+ float s1 = fputil::multiply_add(y3, p1, y);
+
+ if constexpr (IS_SIN) {
+ // sin(x) = cos(k * pi/8) * sin(y) + sin(k * pi/8) * cos(y).
+ return fputil::multiply_add(cos_k, s1, sin_k * c1);
+ } else {
+ // cos(x) = cos(k * pi/8) * cos(y) - sin(k * pi/8) * sin(y).
+ return fputil::multiply_add(cos_k, c1, -sin_k * s1);
+ }
+}
+
+} // namespace sincosf_float_eval
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt
index 1f39a07..bb43e24 100644
--- a/libc/src/arpa/inet/CMakeLists.txt
+++ b/libc/src/arpa/inet/CMakeLists.txt
@@ -23,6 +23,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ inet_aton
+ SRCS
+ inet_aton.cpp
+ HDRS
+ inet_aton.h
+ DEPENDS
+ libc.include.arpa_inet
+ libc.include.llvm-libc-types.in_addr
+ libc.src.__support.common
+ libc.src.__support.str_to_integer
+)
+
+add_entrypoint_object(
ntohl
SRCS
ntohl.cpp
diff --git a/libc/src/arpa/inet/inet_aton.cpp b/libc/src/arpa/inet/inet_aton.cpp
new file mode 100644
index 0000000..71419cb
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.cpp
@@ -0,0 +1,57 @@
+//===-- Implementation of inet_aton function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/inet_aton.h"
+#include "src/__support/common.h"
+#include "src/__support/endian_internal.h"
+#include "src/__support/str_to_integer.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) {
+ constexpr int IPV4_MAX_DOT_NUM = 3;
+ unsigned long parts[IPV4_MAX_DOT_NUM + 1] = {0};
+ int dot_num = 0;
+
+ for (; dot_num <= IPV4_MAX_DOT_NUM; ++dot_num) {
+ auto result = internal::strtointeger<unsigned long>(cp, 0);
+ parts[dot_num] = result;
+
+ if (result.has_error() || result.parsed_len == 0)
+ return 0;
+ char next_char = *(cp + result.parsed_len);
+ if (next_char != '.' && next_char != '\0')
+ return 0;
+ else if (next_char == '\0')
+ break;
+ else
+ cp += (result.parsed_len + 1);
+ }
+
+ if (dot_num > IPV4_MAX_DOT_NUM)
+ return 0;
+
+ // converts the Internet host address cp from the IPv4 numbers-and-dots
+ // notation (a[.b[.c[.d]]]) into binary form (in network byte order)
+ unsigned long result = 0;
+ for (int i = 0; i <= dot_num; ++i) {
+ unsigned long max_part =
+ i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL;
+ if (parts[i] > max_part)
+ return 0;
+ int shift = i == dot_num ? 0 : 8 * (IPV4_MAX_DOT_NUM - i);
+ result |= parts[i] << shift;
+ }
+
+ if (inp)
+ inp->s_addr = Endian::to_big_endian(static_cast<uint32_t>(result));
+
+ return 1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/arpa/inet/inet_aton.h b/libc/src/arpa/inet/inet_aton.h
new file mode 100644
index 0000000..ea387d1
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.h
@@ -0,0 +1,21 @@
+//===-- Implementation header of inet_aton ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+
+#include "include/llvm-libc-types/in_addr.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int inet_aton(const char *cp, in_addr *inp);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp
index a8e634c..c362628 100644
--- a/libc/src/math/generic/sinf.cpp
+++ b/libc/src/math/generic/sinf.cpp
@@ -17,13 +17,30 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \
+ defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+
+#include "src/__support/math/sincosf_float_eval.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
+ return math::sincosf_float_eval::sincosf_eval</*IS_SIN*/ true>(x);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#else // !LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
+
#include "src/__support/math/sincosf_utils.h"
-#if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#include "src/__support/math/range_reduction_fma.h"
-#else
+#else // !LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#include "src/__support/math/range_reduction.h"
-#endif
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
namespace LIBC_NAMESPACE_DECL {
@@ -162,3 +179,4 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
}
} // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
diff --git a/libc/src/nl_types/CMakeLists.txt b/libc/src/nl_types/CMakeLists.txt
new file mode 100644
index 0000000..9783e0e
--- /dev/null
+++ b/libc/src/nl_types/CMakeLists.txt
@@ -0,0 +1,31 @@
+add_entrypoint_object(
+ catopen
+ SRCS
+ catopen.cpp
+ HDRS
+ catopen.h
+ DEPENDS
+ libc.include.llvm-libc-types.nl_catd
+ libc.src.errno.errno
+)
+
+add_entrypoint_object(
+ catclose
+ SRCS
+ catclose.cpp
+ HDRS
+ catclose.h
+ DEPENDS
+ libc.include.llvm-libc-types.nl_catd
+)
+
+add_entrypoint_object(
+ catgets
+ SRCS
+ catgets.cpp
+ HDRS
+ catgets.h
+ DEPENDS
+ libc.include.llvm-libc-types.nl_catd
+)
+
diff --git a/libc/src/nl_types/catclose.cpp b/libc/src/nl_types/catclose.cpp
new file mode 100644
index 0000000..1f87900d
--- /dev/null
+++ b/libc/src/nl_types/catclose.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of catclose ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/nl_types/catclose.h"
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, catclose, ([[maybe_unused]] nl_catd catalog)) {
+ // TODO: Add implementation for message catalogs. For now, return error
+ // regardless of input.
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/nl_types/catclose.h b/libc/src/nl_types/catclose.h
new file mode 100644
index 0000000..433020a
--- /dev/null
+++ b/libc/src/nl_types/catclose.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for catclose ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H
+#define LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H
+
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int catclose(nl_catd catalog);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_NL_TYPES_CATCLOSE_H
diff --git a/libc/src/nl_types/catgets.cpp b/libc/src/nl_types/catgets.cpp
new file mode 100644
index 0000000..3768977
--- /dev/null
+++ b/libc/src/nl_types/catgets.cpp
@@ -0,0 +1,25 @@
+//===-- Implementation of catgets -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/nl_types/catgets.h"
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(char *, catgets,
+ ([[maybe_unused]] nl_catd catalog,
+ [[maybe_unused]] int set_number,
+ [[maybe_unused]] int message_number, const char *message)) {
+ // TODO: Add implementation for message catalogs. For now, return backup
+ // message regardless of input.
+ return const_cast<char *>(message);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/nl_types/catgets.h b/libc/src/nl_types/catgets.h
new file mode 100644
index 0000000..c909bec
--- /dev/null
+++ b/libc/src/nl_types/catgets.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for catgets -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_NL_TYPES_CATGETS_H
+#define LLVM_LIBC_SRC_NL_TYPES_CATGETS_H
+
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+char *catgets(nl_catd catalog, int set_number, int message_number,
+ const char *message);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_NL_TYPES_CATGETS_H
diff --git a/libc/src/nl_types/catopen.cpp b/libc/src/nl_types/catopen.cpp
new file mode 100644
index 0000000..393d760
--- /dev/null
+++ b/libc/src/nl_types/catopen.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of catopen -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/nl_types/catopen.h"
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(nl_catd, catopen,
+ ([[maybe_unused]] const char *name,
+ [[maybe_unused]] int flag)) {
+ // TODO: Add implementation for message catalogs. For now, return error
+ // regardless of input.
+ libc_errno = EINVAL;
+ return reinterpret_cast<nl_catd>(-1);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/nl_types/catopen.h b/libc/src/nl_types/catopen.h
new file mode 100644
index 0000000..08ff71a
--- /dev/null
+++ b/libc/src/nl_types/catopen.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for catopen -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H
+#define LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H
+
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+nl_catd catopen(const char *name, int flag);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_NL_TYPES_CATOPEN_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index c576e08..0c6ec9f 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -96,6 +96,7 @@ add_subdirectory(assert)
add_subdirectory(compiler)
add_subdirectory(dirent)
add_subdirectory(locale)
+add_subdirectory(nl_types)
add_subdirectory(signal)
add_subdirectory(spawn)
diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt
index 21760df..690f751 100644
--- a/libc/test/src/arpa/inet/CMakeLists.txt
+++ b/libc/test/src/arpa/inet/CMakeLists.txt
@@ -23,6 +23,17 @@ add_libc_unittest(
)
add_libc_unittest(
+ inet_aton
+ SUITE
+ libc_arpa_inet_unittests
+ SRCS
+ inet_aton_test.cpp
+ DEPENDS
+ libc.src.arpa.inet.htonl
+ libc.src.arpa.inet.inet_aton
+)
+
+add_libc_unittest(
ntohl
SUITE
libc_arpa_inet_unittests
diff --git a/libc/test/src/arpa/inet/inet_aton_test.cpp b/libc/test/src/arpa/inet/inet_aton_test.cpp
new file mode 100644
index 0000000..c9c9787
--- /dev/null
+++ b/libc/test/src/arpa/inet/inet_aton_test.cpp
@@ -0,0 +1,92 @@
+//===-- Unittests for inet_aton -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/htonl.h"
+#include "src/arpa/inet/inet_aton.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+TEST(LlvmLibcInetAton, ValidTest) {
+ in_addr a;
+
+ // a.b.c.d
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("127.1.2.4", &a));
+ ASSERT_EQ(htonl(0x7f010204), a.s_addr);
+
+ // a.b.c
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("127.1.4", &a));
+ ASSERT_EQ(htonl(0x7f010004), a.s_addr);
+
+ // a.b
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("127.1", &a));
+ ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+ // a
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("0x7f000001", &a));
+ ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+ // Hex (0x) and mixed-case hex digits.
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a));
+ ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+ // Hex (0X) and mixed-case hex digits.
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a));
+ ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+ // Octal.
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("0177.0.0.1", &a));
+ ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+ a.s_addr = 0;
+ ASSERT_EQ(1, inet_aton("036", &a));
+ ASSERT_EQ(htonl(036U), a.s_addr);
+}
+
+TEST(LlvmLibcInetAton, InvalidTest) {
+ ASSERT_EQ(0, inet_aton("", nullptr)); // Empty.
+ ASSERT_EQ(0, inet_aton("x", nullptr)); // Leading junk.
+ ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk.
+ ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr)); // Invalid octal.
+ ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr)); // Invalid hex.
+ ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots.
+ ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr)); // Trailing dot.
+
+ // Out of range a.b.c.d form.
+ ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr));
+ ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr));
+ ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr));
+ ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr));
+
+ // Out of range a.b.c form.
+ ASSERT_EQ(0, inet_aton("256.0.0", nullptr));
+ ASSERT_EQ(0, inet_aton("0.256.0", nullptr));
+ ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr));
+
+ // Out of range a.b form.
+ ASSERT_EQ(0, inet_aton("256.0", nullptr));
+ ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr));
+
+ // Out of range a form.
+ ASSERT_EQ(0, inet_aton("0x100000000", nullptr));
+
+ // 64-bit overflow.
+ ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr));
+
+ // Out of range octal.
+ ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index b3f54ab..ff5c511 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -17,6 +17,20 @@ add_fp_unittest(
)
add_fp_unittest(
+ cosf_float_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ cosf_float_test.cpp
+ DEPENDS
+ libc.src.__support.math.sincosf_utils
+ libc.src.__support.FPUtil.fp_bits
+ FLAGS
+ FMA_OPT__ONLY
+)
+
+add_fp_unittest(
cos_test
NEED_MPFR
SUITE
@@ -97,6 +111,20 @@ add_fp_unittest(
)
add_fp_unittest(
+ sinf_float_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ sinf_float_test.cpp
+ DEPENDS
+ libc.src.__support.math.sincosf_utils
+ libc.src.__support.FPUtil.fp_bits
+ FLAGS
+ FMA_OPT__ONLY
+)
+
+add_fp_unittest(
sinf16_test
NEED_MPFR
SUITE
diff --git a/libc/test/src/math/cosf_float_test.cpp b/libc/test/src/math/cosf_float_test.cpp
new file mode 100644
index 0000000..3d573b2
--- /dev/null
+++ b/libc/test/src/math/cosf_float_test.cpp
@@ -0,0 +1,35 @@
+//===-- Unittests for cosf float-only -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+#include "hdr/stdint_proxy.h"
+
+using LlvmLibcCosfFloatTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+float cosf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ false>(x);
+}
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+TEST_F(LlvmLibcCosfFloatTest, InFloatRange) {
+ constexpr uint32_t COUNT = 100'000;
+ constexpr uint32_t STEP = UINT32_MAX / COUNT;
+ for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+ float x = FPBits(v).get_val();
+ if (FPBits(v).is_nan() || FPBits(v).is_inf())
+ continue;
+ ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cosf_fast(x), 3.5);
+ }
+}
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 1583ab6..2ff4f02 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -58,6 +58,21 @@ add_fp_unittest(
)
add_fp_unittest(
+ sinf_float_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ sinf_float_test.cpp
+ LINK_LIBRARIES
+ -lpthread
+ DEPENDS
+ .exhaustive_test
+ libc.src.__support.math.sincosf_utils
+)
+
+add_fp_unittest(
sinpif_test
NO_RUN_POSTBUILD
NEED_MPFR
@@ -90,6 +105,21 @@ add_fp_unittest(
)
add_fp_unittest(
+ cosf_float_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ cosf_float_test.cpp
+ LINK_LIBRARIES
+ -lpthread
+ DEPENDS
+ .exhaustive_test
+ libc.src.__support.math.sincosf_utils
+)
+
+add_fp_unittest(
cospif_test
NO_RUN_POSTBUILD
NEED_MPFR
diff --git a/libc/test/src/math/exhaustive/cosf_float_test.cpp b/libc/test/src/math/exhaustive/cosf_float_test.cpp
new file mode 100644
index 0000000..0c3a988
--- /dev/null
+++ b/libc/test/src/math/exhaustive/cosf_float_test.cpp
@@ -0,0 +1,44 @@
+//===-- Exhaustive test for cosf - float-only -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+float cosf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ false>(x);
+}
+
+using LlvmLibcCosfExhaustiveTest =
+ LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Cos, cosf_fast,
+ 3>;
+
+// Range: [0, Inf];
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << POS_START << ", 0x" << POS_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP);
+}
+
+// Range: [-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP);
+}
diff --git a/libc/test/src/math/exhaustive/exhaustive_test.h b/libc/test/src/math/exhaustive/exhaustive_test.h
index 8be65ba..322d774 100644
--- a/libc/test/src/math/exhaustive/exhaustive_test.h
+++ b/libc/test/src/math/exhaustive/exhaustive_test.h
@@ -40,7 +40,7 @@ template <typename OutType, typename InType = OutType>
using UnaryOp = OutType(InType);
template <typename OutType, typename InType, mpfr::Operation Op,
- UnaryOp<OutType, InType> Func>
+ UnaryOp<OutType, InType> Func, unsigned Tolerance = 0>
struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test {
using FloatType = InType;
using FPBits = LIBC_NAMESPACE::fputil::FPBits<FloatType>;
@@ -57,8 +57,8 @@ struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test {
do {
FPBits xbits(bits);
FloatType x = xbits.get_val();
- bool correct =
- TEST_MPFR_MATCH_ROUNDING_SILENTLY(Op, x, Func(x), 0.5, rounding);
+ bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY(
+ Op, x, Func(x), static_cast<double>(Tolerance) + 0.5, rounding);
failed += (!correct);
// Uncomment to print out failed values.
if (!correct) {
@@ -256,9 +256,10 @@ struct LlvmLibcExhaustiveMathTest
}
};
-template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func>
-using LlvmLibcUnaryOpExhaustiveMathTest =
- LlvmLibcExhaustiveMathTest<UnaryOpChecker<FloatType, FloatType, Op, Func>>;
+template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func,
+ unsigned Tolerance = 0>
+using LlvmLibcUnaryOpExhaustiveMathTest = LlvmLibcExhaustiveMathTest<
+ UnaryOpChecker<FloatType, FloatType, Op, Func, Tolerance>>;
template <typename OutType, typename InType, mpfr::Operation Op,
UnaryOp<OutType, InType> Func>
diff --git a/libc/test/src/math/exhaustive/sinf_float_test.cpp b/libc/test/src/math/exhaustive/sinf_float_test.cpp
new file mode 100644
index 0000000..1e735e6
--- /dev/null
+++ b/libc/test/src/math/exhaustive/sinf_float_test.cpp
@@ -0,0 +1,47 @@
+//===-- Exhaustive test for sinf - float-only -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test float-only fast math implementation for sinf.
+#define LIBC_MATH (LIBC_MATH_FAST | LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT)
+
+#include "exhaustive_test.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+float sinf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ true>(x);
+}
+
+using LlvmLibcSinfExhaustiveTest =
+ LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Sin, sinf_fast,
+ 3>;
+
+// Range: [0, Inf];
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << POS_START << ", 0x" << POS_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP);
+}
+
+// Range: [-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP);
+}
diff --git a/libc/test/src/math/sinf_float_test.cpp b/libc/test/src/math/sinf_float_test.cpp
new file mode 100644
index 0000000..33aab96
--- /dev/null
+++ b/libc/test/src/math/sinf_float_test.cpp
@@ -0,0 +1,35 @@
+//===-- Unittests for sinf float-only -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+#include "hdr/stdint_proxy.h"
+
+using LlvmLibcSinfFloatTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+float sinf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ true>(x);
+}
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+TEST_F(LlvmLibcSinfFloatTest, InFloatRange) {
+ constexpr uint32_t COUNT = 100'000;
+ constexpr uint32_t STEP = UINT32_MAX / COUNT;
+ for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+ float x = FPBits(v).get_val();
+ if (FPBits(v).is_nan() || FPBits(v).is_inf())
+ continue;
+ ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, sinf_fast(x), 3.5);
+ }
+}
diff --git a/libc/test/src/nl_types/CMakeLists.txt b/libc/test/src/nl_types/CMakeLists.txt
new file mode 100644
index 0000000..4fce637
--- /dev/null
+++ b/libc/test/src/nl_types/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_custom_target(libc-nl-types-tests)
+
+add_libc_test(
+ nl_types_test
+ SUITE
+ libc-nl-types-tests
+ SRCS
+ nl_types_test.cpp
+ DEPENDS
+ libc.include.llvm-libc-types.nl_catd
+ libc.src.nl_types.catopen
+ libc.src.nl_types.catclose
+ libc.src.nl_types.catgets
+)
diff --git a/libc/test/src/nl_types/nl_types_test.cpp b/libc/test/src/nl_types/nl_types_test.cpp
new file mode 100644
index 0000000..5ae5c5a
--- /dev/null
+++ b/libc/test/src/nl_types/nl_types_test.cpp
@@ -0,0 +1,33 @@
+//===-- Unittests for nl_types --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/llvm-libc-types/nl_catd.h"
+#include "src/nl_types/catclose.h"
+#include "src/nl_types/catgets.h"
+#include "src/nl_types/catopen.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+
+using LlvmLibcNlTypesTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcNlTypesTest, CatopenFails) {
+ ASSERT_EQ(LIBC_NAMESPACE::catopen("/somepath", 0),
+ reinterpret_cast<nl_catd>(-1));
+ ASSERT_ERRNO_EQ(EINVAL);
+}
+
+TEST_F(LlvmLibcNlTypesTest, CatcloseFails) {
+ ASSERT_EQ(LIBC_NAMESPACE::catclose(nullptr), -1);
+}
+
+TEST_F(LlvmLibcNlTypesTest, CatgetsFails) {
+ const char *message = "message";
+ // Note that we test for pointer equality here, since catgets
+ // is expected to return the input argument as-is.
+ ASSERT_EQ(LIBC_NAMESPACE::catgets(nullptr, 0, 0, message),
+ const_cast<char *>(message));
+}
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index 5d71d2cf..1ab4c43 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -25,6 +25,8 @@
#if !defined(__USING_SJLJ_EXCEPTIONS__)
#if defined(__i386__)
+.att_syntax
+
DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
#
# extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
@@ -69,6 +71,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
# skip gs
#elif defined(__x86_64__) && !defined(__arm64ec__)
+.att_syntax
DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
#
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index fe3ba78..31a177f 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -25,6 +25,7 @@
#if !defined(__USING_SJLJ_EXCEPTIONS__)
#if defined(__i386__)
+.att_syntax
#
# extern int __unw_getcontext(unw_context_t* thread_state)
@@ -109,6 +110,7 @@ DEFINE_LIBUNWIND_FUNCTION("#__unw_getcontext")
.text
#elif defined(__x86_64__)
+.att_syntax
#
# extern int __unw_getcontext(unw_context_t* thread_state)
diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md
index 287484e..032edb6 100644
--- a/lldb/docs/resources/lldbgdbremote.md
+++ b/lldb/docs/resources/lldbgdbremote.md
@@ -2491,9 +2491,10 @@ The packet below are supported by the
### qWasmCallStack
Get the Wasm call stack for the given thread id. This returns a hex-encoded
-list of PC values, one for each frame of the call stack. To match the Wasm
-specification, the addresses are encoded in little endian byte order, even if
-the endian of the Wasm runtime's host is not little endian.
+list (with no delimiters) of 64-bit PC values, one for each frame of the call
+stack. To match the Wasm specification, the addresses are encoded in little
+endian byte order, even if the endian of the Wasm runtime's host is not little
+endian.
```
send packet: $qWasmCallStack:202dbe040#08
diff --git a/lldb/include/lldb/Symbol/DeclVendor.h b/lldb/include/lldb/Symbol/DeclVendor.h
index 19ab2bb..5b0cbf9 100644
--- a/lldb/include/lldb/Symbol/DeclVendor.h
+++ b/lldb/include/lldb/Symbol/DeclVendor.h
@@ -20,7 +20,6 @@ namespace lldb_private {
class DeclVendor {
public:
enum DeclVendorKind {
- eClangDeclVendor,
eClangModuleDeclVendor,
eAppleObjCDeclVendor,
eLastClangDeclVendor,
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index dc75d98..8f5892e 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -1571,6 +1571,28 @@ public:
virtual size_t ReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
Status &error);
+ /// Read from multiple memory ranges and write the results into buffer.
+ /// This calls ReadMemoryFromInferior multiple times, once per range,
+ /// bypassing the read cache. Process implementations that can perform this
+ /// operation more efficiently should override this.
+ ///
+ /// \param[in] ranges
+ /// A collection of ranges (base address + size) to read from.
+ ///
+ /// \param[out] buffer
+ /// A buffer where the read memory will be written to. It must be at least
+ /// as long as the sum of the sizes of each range.
+ ///
+ /// \return
+ /// A vector of MutableArrayRef, where each MutableArrayRef is a slice of
+ /// the input buffer into which the memory contents were copied. The size
+ /// of the slice indicates how many bytes were read successfully. Partial
+ /// reads are always performed from the start of the requested range,
+ /// never from the middle or end.
+ virtual llvm::SmallVector<llvm::MutableArrayRef<uint8_t>>
+ ReadMemoryRanges(llvm::ArrayRef<Range<lldb::addr_t, size_t>> ranges,
+ llvm::MutableArrayRef<uint8_t> buffer);
+
/// Read of memory from a process.
///
/// This function has the same semantics of ReadMemory except that it
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
index 2aae7d1..01d588f 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
@@ -5,7 +5,6 @@ add_lldb_library(lldbPluginExpressionParserClang
ClangASTImporter.cpp
ClangASTMetadata.cpp
ClangASTSource.cpp
- ClangDeclVendor.cpp
ClangExpressionDeclMap.cpp
ClangExpressionHelper.cpp
ClangExpressionParser.cpp
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index ebe7be4..0efeb2e 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -8,7 +8,6 @@
#include "ClangASTSource.h"
-#include "ClangDeclVendor.h"
#include "ClangModulesDeclVendor.h"
#include "lldb/Core/Module.h"
@@ -799,7 +798,7 @@ void ClangASTSource::FindDeclInModules(NameSearchContext &context,
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
if (!modules_decl_vendor->FindDecls(name, append, max_matches, decls))
return;
@@ -807,7 +806,8 @@ void ClangASTSource::FindDeclInModules(NameSearchContext &context,
LLDB_LOG(log, " CAS::FEVD Matching entity found for \"{0}\" in the modules",
name);
- clang::NamedDecl *const decl_from_modules = decls[0];
+ auto *const decl_from_modules =
+ llvm::cast<NamedDecl>(ClangUtil::GetDecl(decls[0]));
if (llvm::isa<clang::TypeDecl>(decl_from_modules) ||
llvm::isa<clang::ObjCContainerDecl>(decl_from_modules) ||
@@ -849,16 +849,16 @@ void ClangASTSource::FindDeclInObjCRuntime(NameSearchContext &context,
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
- auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor);
+ auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor);
if (!clang_decl_vendor->FindDecls(name, append, max_matches, decls))
return;
LLDB_LOG(log, " CAS::FEVD Matching type found for \"{0}\" in the runtime",
name);
- clang::Decl *copied_decl = CopyDecl(decls[0]);
+ clang::Decl *copied_decl = CopyDecl(ClangUtil::GetDecl(decls[0]));
clang::NamedDecl *copied_named_decl =
copied_decl ? dyn_cast<clang::NamedDecl>(copied_decl) : nullptr;
@@ -1081,14 +1081,14 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
ConstString interface_name(interface_decl->getNameAsString().c_str());
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
if (!modules_decl_vendor->FindDecls(interface_name, append, max_matches,
decls))
break;
ObjCInterfaceDecl *interface_decl_from_modules =
- dyn_cast<ObjCInterfaceDecl>(decls[0]);
+ dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0]));
if (!interface_decl_from_modules)
break;
@@ -1121,15 +1121,15 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
ConstString interface_name(interface_decl->getNameAsString().c_str());
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
- auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor);
+ auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor);
if (!clang_decl_vendor->FindDecls(interface_name, append, max_matches,
decls))
break;
ObjCInterfaceDecl *runtime_interface_decl =
- dyn_cast<ObjCInterfaceDecl>(decls[0]);
+ dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0]));
if (!runtime_interface_decl)
break;
@@ -1254,13 +1254,13 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) {
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
if (!modules_decl_vendor->FindDecls(class_name, append, max_matches, decls))
break;
DeclFromUser<const ObjCInterfaceDecl> interface_decl_from_modules(
- dyn_cast<ObjCInterfaceDecl>(decls[0]));
+ dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0])));
if (!interface_decl_from_modules.IsValid())
break;
@@ -1297,14 +1297,14 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) {
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
- auto *clang_decl_vendor = llvm::cast<ClangDeclVendor>(decl_vendor);
+ auto *clang_decl_vendor = llvm::cast<DeclVendor>(decl_vendor);
if (!clang_decl_vendor->FindDecls(class_name, append, max_matches, decls))
break;
DeclFromUser<const ObjCInterfaceDecl> interface_decl_from_runtime(
- dyn_cast<ObjCInterfaceDecl>(decls[0]));
+ dyn_cast<ObjCInterfaceDecl>(ClangUtil::GetDecl(decls[0])));
if (!interface_decl_from_runtime.IsValid())
break;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp
deleted file mode 100644
index 867d4ff..0000000
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- ClangDeclVendor.cpp -----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h"
-#include "Plugins/ExpressionParser/Clang/ClangUtil.h"
-#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
-
-#include "lldb/Utility/ConstString.h"
-
-using namespace lldb_private;
-
-uint32_t ClangDeclVendor::FindDecls(ConstString name, bool append,
- uint32_t max_matches,
- std::vector<clang::NamedDecl *> &decls) {
- if (!append)
- decls.clear();
-
- std::vector<CompilerDecl> compiler_decls;
- uint32_t ret = FindDecls(name, /*append*/ false, max_matches, compiler_decls);
- for (CompilerDecl compiler_decl : compiler_decls) {
- clang::Decl *d = ClangUtil::GetDecl(compiler_decl);
- clang::NamedDecl *nd = llvm::cast<clang::NamedDecl>(d);
- decls.push_back(nd);
- }
- return ret;
-}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h
deleted file mode 100644
index a9b2d41..0000000
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- ClangDeclVendor.h ---------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGDECLVENDOR_H
-#define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGDECLVENDOR_H
-
-#include "lldb/Symbol/DeclVendor.h"
-
-namespace clang {
-class NamedDecl;
-}
-
-namespace lldb_private {
-
-// A clang specialized extension to DeclVendor.
-class ClangDeclVendor : public DeclVendor {
-public:
- ClangDeclVendor(DeclVendorKind kind) : DeclVendor(kind) {}
-
- ~ClangDeclVendor() override = default;
-
- using DeclVendor::FindDecls;
-
- uint32_t FindDecls(ConstString name, bool append, uint32_t max_matches,
- std::vector<clang::NamedDecl *> &decls);
-
- static bool classof(const DeclVendor *vendor) {
- return vendor->GetKind() >= eClangDeclVendor &&
- vendor->GetKind() < eLastClangDeclVendor;
- }
-
-private:
- ClangDeclVendor(const ClangDeclVendor &) = delete;
- const ClangDeclVendor &operator=(const ClangDeclVendor &) = delete;
-};
-} // namespace lldb_private
-
-#endif
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 833bc3b..9cb8f7a 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -1023,13 +1023,14 @@ void ClangExpressionDeclMap::LookupInModulesDeclVendor(
bool append = false;
uint32_t max_matches = 1;
- std::vector<clang::NamedDecl *> decls;
+ std::vector<CompilerDecl> decls;
if (!modules_decl_vendor->FindDecls(name, append, max_matches, decls))
return;
assert(!decls.empty() && "FindDecls returned true but no decls?");
- clang::NamedDecl *const decl_from_modules = decls[0];
+ auto *const decl_from_modules =
+ llvm::cast<NamedDecl>(ClangUtil::GetDecl(decls[0]));
LLDB_LOG(log,
" CAS::FEVD Matching decl found for "
@@ -1223,7 +1224,7 @@ bool ClangExpressionDeclMap::LookupFunction(
Target *target = m_parser_vars->m_exe_ctx.GetTargetPtr();
- std::vector<clang::NamedDecl *> decls_from_modules;
+ std::vector<CompilerDecl> decls_from_modules;
if (target) {
if (std::shared_ptr<ClangModulesDeclVendor> decl_vendor =
@@ -1314,7 +1315,8 @@ bool ClangExpressionDeclMap::LookupFunction(
}
if (!found_function_with_type_info) {
- for (clang::NamedDecl *decl : decls_from_modules) {
+ for (const CompilerDecl &compiler_decl : decls_from_modules) {
+ clang::Decl *decl = ClangUtil::GetDecl(compiler_decl);
if (llvm::isa<clang::FunctionDecl>(decl)) {
clang::NamedDecl *copied_decl =
llvm::cast_or_null<FunctionDecl>(CopyDecl(decl));
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
index 67984c5..b77e269 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
@@ -226,7 +226,7 @@ void StoringDiagnosticConsumer::SetCurrentModuleProgress(
}
ClangModulesDeclVendor::ClangModulesDeclVendor()
- : ClangDeclVendor(eClangModuleDeclVendor) {}
+ : DeclVendor(eClangModuleDeclVendor) {}
ClangModulesDeclVendor::~ClangModulesDeclVendor() = default;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
index d820552..ad4d060 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
@@ -9,17 +9,16 @@
#ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGMODULESDECLVENDOR_H
#define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGMODULESDECLVENDOR_H
+#include "lldb/Symbol/DeclVendor.h"
#include "lldb/Symbol/SourceModule.h"
#include "lldb/Target/Platform.h"
-#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h"
-
#include <set>
#include <vector>
namespace lldb_private {
-class ClangModulesDeclVendor : public ClangDeclVendor {
+class ClangModulesDeclVendor : public DeclVendor {
public:
// Constructors and Destructors
ClangModulesDeclVendor();
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index 460c503..954f269 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -281,22 +281,23 @@ ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
const size_t num_methods = addresses.size();
llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0);
- llvm::DenseSet<uint32_t> failed_indices;
- for (auto [idx, addr] : llvm::enumerate(addresses)) {
- Status error;
- process->ReadMemory(addr, buffer.data() + idx * size, size, error);
- if (error.Fail())
- failed_indices.insert(idx);
- }
+ llvm::SmallVector<Range<addr_t, size_t>> mem_ranges =
+ llvm::to_vector(llvm::map_range(llvm::seq(num_methods), [&](size_t idx) {
+ return Range<addr_t, size_t>(addresses[idx], size);
+ }));
+
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results =
+ process->ReadMemoryRanges(mem_ranges, buffer);
llvm::SmallVector<method_t, 0> methods;
methods.reserve(num_methods);
- for (auto [idx, addr] : llvm::enumerate(addresses)) {
- if (failed_indices.contains(idx))
+ for (auto [addr, memory] : llvm::zip(addresses, read_results)) {
+ // Ignore partial reads.
+ if (memory.size() != size)
continue;
- DataExtractor extractor(buffer.data() + idx * size, size,
- process->GetByteOrder(),
+
+ DataExtractor extractor(memory.data(), size, process->GetByteOrder(),
process->GetAddressByteSize());
methods.push_back(method_t());
methods.back().Read(extractor, process, addr, relative_string_base_addr,
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
index d6d2df2..60f9893 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
@@ -131,7 +131,7 @@ private:
};
AppleObjCDeclVendor::AppleObjCDeclVendor(ObjCLanguageRuntime &runtime)
- : ClangDeclVendor(eAppleObjCDeclVendor), m_runtime(runtime),
+ : DeclVendor(eAppleObjCDeclVendor), m_runtime(runtime),
m_type_realizer_sp(m_runtime.GetEncodingToType()) {
m_ast_ctx = std::make_shared<TypeSystemClang>(
"AppleObjCDeclVendor AST",
@@ -537,83 +537,75 @@ uint32_t AppleObjCDeclVendor::FindDecls(ConstString name, bool append,
if (!append)
decls.clear();
- uint32_t ret = 0;
+ // See if the type is already in our ASTContext.
- do {
- // See if the type is already in our ASTContext.
-
- clang::ASTContext &ast_ctx = m_ast_ctx->getASTContext();
-
- clang::IdentifierInfo &identifier_info =
- ast_ctx.Idents.get(name.GetStringRef());
- clang::DeclarationName decl_name =
- ast_ctx.DeclarationNames.getIdentifier(&identifier_info);
-
- clang::DeclContext::lookup_result lookup_result =
- ast_ctx.getTranslationUnitDecl()->lookup(decl_name);
-
- if (!lookup_result.empty()) {
- if (clang::ObjCInterfaceDecl *result_iface_decl =
- llvm::dyn_cast<clang::ObjCInterfaceDecl>(*lookup_result.begin())) {
- if (log) {
- clang::QualType result_iface_type =
- ast_ctx.getObjCInterfaceType(result_iface_decl);
-
- uint64_t isa_value = LLDB_INVALID_ADDRESS;
- if (std::optional<ClangASTMetadata> metadata =
- m_ast_ctx->GetMetadata(result_iface_decl))
- isa_value = metadata->GetISAPtr();
-
- LLDB_LOGF(log,
- "AOCTV::FT Found %s (isa 0x%" PRIx64 ") in the ASTContext",
- result_iface_type.getAsString().data(), isa_value);
- }
+ clang::ASTContext &ast_ctx = m_ast_ctx->getASTContext();
- decls.push_back(m_ast_ctx->GetCompilerDecl(result_iface_decl));
- ret++;
- break;
- } else {
- LLDB_LOGF(log, "AOCTV::FT There's something in the ASTContext, but "
- "it's not something we know about");
- break;
+ clang::IdentifierInfo &identifier_info =
+ ast_ctx.Idents.get(name.GetStringRef());
+ clang::DeclarationName decl_name =
+ ast_ctx.DeclarationNames.getIdentifier(&identifier_info);
+
+ clang::DeclContext::lookup_result lookup_result =
+ ast_ctx.getTranslationUnitDecl()->lookup(decl_name);
+
+ if (!lookup_result.empty()) {
+ if (clang::ObjCInterfaceDecl *result_iface_decl =
+ llvm::dyn_cast<clang::ObjCInterfaceDecl>(*lookup_result.begin())) {
+ if (log) {
+ clang::QualType result_iface_type =
+ ast_ctx.getObjCInterfaceType(result_iface_decl);
+
+ uint64_t isa_value = LLDB_INVALID_ADDRESS;
+ if (std::optional<ClangASTMetadata> metadata =
+ m_ast_ctx->GetMetadata(result_iface_decl))
+ isa_value = metadata->GetISAPtr();
+
+ LLDB_LOGF(log,
+ "AOCTV::FT Found %s (isa 0x%" PRIx64 ") in the ASTContext",
+ result_iface_type.getAsString().data(), isa_value);
}
- } else if (log) {
- LLDB_LOGF(log, "AOCTV::FT Couldn't find %s in the ASTContext",
- name.AsCString());
+
+ decls.push_back(m_ast_ctx->GetCompilerDecl(result_iface_decl));
+ return 1;
}
- // It's not. If it exists, we have to put it into our ASTContext.
+ LLDB_LOGF(log, "AOCTV::FT There's something in the ASTContext, but "
+ "it's not something we know about");
+ return 0;
+ }
- ObjCLanguageRuntime::ObjCISA isa = m_runtime.GetISA(name);
+ LLDB_LOGF(log, "AOCTV::FT Couldn't find %s in the ASTContext",
+ name.AsCString());
- if (!isa) {
- LLDB_LOGF(log, "AOCTV::FT Couldn't find the isa");
+ // It's not. If it exists, we have to put it into our ASTContext.
- break;
- }
+ ObjCLanguageRuntime::ObjCISA isa = m_runtime.GetISA(name);
- clang::ObjCInterfaceDecl *iface_decl = GetDeclForISA(isa);
+ if (!isa) {
+ LLDB_LOGF(log, "AOCTV::FT Couldn't find the isa");
- if (!iface_decl) {
- LLDB_LOGF(log,
- "AOCTV::FT Couldn't get the Objective-C interface for "
- "isa 0x%" PRIx64,
- (uint64_t)isa);
+ return 0;
+ }
- break;
- }
+ clang::ObjCInterfaceDecl *iface_decl = GetDeclForISA(isa);
- if (log) {
- clang::QualType new_iface_type = ast_ctx.getObjCInterfaceType(iface_decl);
+ if (!iface_decl) {
+ LLDB_LOGF(log,
+ "AOCTV::FT Couldn't get the Objective-C interface for "
+ "isa 0x%" PRIx64,
+ (uint64_t)isa);
- LLDB_LOG(log, "AOCTV::FT Created {0} (isa 0x{1:x})",
- new_iface_type.getAsString(), (uint64_t)isa);
- }
+ return 0;
+ }
- decls.push_back(m_ast_ctx->GetCompilerDecl(iface_decl));
- ret++;
- break;
- } while (false);
+ if (log) {
+ clang::QualType new_iface_type = ast_ctx.getObjCInterfaceType(iface_decl);
+
+ LLDB_LOG(log, "AOCTV::FT Created {0} (isa 0x{1:x})",
+ new_iface_type.getAsString(), (uint64_t)isa);
+ }
- return ret;
+ decls.push_back(m_ast_ctx->GetCompilerDecl(iface_decl));
+ return 1;
}
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
index 3bb0f77..2cfa86d 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
@@ -11,15 +11,15 @@
#include "lldb/lldb-private.h"
-#include "Plugins/ExpressionParser/Clang/ClangDeclVendor.h"
#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
+#include "lldb/Symbol/DeclVendor.h"
namespace lldb_private {
class AppleObjCExternalASTSource;
-class AppleObjCDeclVendor : public ClangDeclVendor {
+class AppleObjCDeclVendor : public DeclVendor {
public:
AppleObjCDeclVendor(ObjCLanguageRuntime &runtime);
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 3176852..fb9e7eb 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1971,6 +1971,49 @@ size_t Process::ReadMemory(addr_t addr, void *buf, size_t size, Status &error) {
}
}
+llvm::SmallVector<llvm::MutableArrayRef<uint8_t>>
+Process::ReadMemoryRanges(llvm::ArrayRef<Range<lldb::addr_t, size_t>> ranges,
+ llvm::MutableArrayRef<uint8_t> buffer) {
+ auto total_ranges_len = llvm::sum_of(
+ llvm::map_range(ranges, [](auto range) { return range.size; }));
+ // If the buffer is not large enough, this is a programmer error.
+ // In production builds, gracefully fail by returning a length of 0 for all
+ // ranges.
+ assert(buffer.size() >= total_ranges_len && "provided buffer is too short");
+ if (buffer.size() < total_ranges_len) {
+ llvm::MutableArrayRef<uint8_t> empty;
+ return {ranges.size(), empty};
+ }
+
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> results;
+
+ // While `buffer` has space, take the next requested range and read
+ // memory into a `buffer` piece, then slice it to remove the used memory.
+ for (auto [addr, range_len] : ranges) {
+ Status status;
+ size_t num_bytes_read =
+ ReadMemoryFromInferior(addr, buffer.data(), range_len, status);
+ // FIXME: ReadMemoryFromInferior promises to return 0 in case of errors, but
+ // it doesn't; it never checks for errors.
+ if (status.Fail())
+ num_bytes_read = 0;
+
+ assert(num_bytes_read <= range_len && "read more than requested bytes");
+ if (num_bytes_read > range_len) {
+ // In production builds, gracefully fail by returning length zero for this
+ // range.
+ results.emplace_back();
+ continue;
+ }
+
+ results.push_back(buffer.take_front(num_bytes_read));
+ // Slice buffer to remove the used memory.
+ buffer = buffer.drop_front(num_bytes_read);
+ }
+
+ return results;
+}
+
void Process::DoFindInMemory(lldb::addr_t start_addr, lldb::addr_t end_addr,
const uint8_t *buf, size_t size,
AddressRanges &matches, size_t alignment,
diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index b1ace62..e3bffbc 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -140,7 +140,12 @@ if(CMAKE_HOST_APPLE)
endif()
if(WIN32 AND CMAKE_BUILD_TYPE STREQUAL Debug)
- set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE_DEBUG}")
+ if(${CMAKE_VERSION} VERSION_LESS "3.30")
+ message(WARNING "CMake version is inferior to 3.30. Some lldb tests will fail.")
+ set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE}")
+ else()
+ set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE_DEBUG}")
+ endif()
else()
set(LLDB_PYTHON_API_TEST_EXECUTABLE "${Python3_EXECUTABLE}")
endif()
diff --git a/lldb/unittests/Target/MemoryTest.cpp b/lldb/unittests/Target/MemoryTest.cpp
index 4a96730..f7b4e97 100644
--- a/lldb/unittests/Target/MemoryTest.cpp
+++ b/lldb/unittests/Target/MemoryTest.cpp
@@ -17,6 +17,7 @@
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/DataBufferHeap.h"
#include "gtest/gtest.h"
+#include <cstdint>
using namespace lldb_private;
using namespace lldb;
@@ -225,3 +226,144 @@ TEST_F(MemoryTest, TesetMemoryCacheRead) {
// instead of using an
// old cache
}
+
+/// A process class that, when asked to read memory from some address X, returns
+/// the least significant byte of X.
+class DummyReaderProcess : public Process {
+public:
+ // If true, `DoReadMemory` will not return all requested bytes.
+ // It's not possible to control exactly how many bytes will be read, because
+ // Process::ReadMemoryFromInferior tries to fulfill the entire request by
+ // reading smaller chunks until it gets nothing back.
+ bool read_less_than_requested = false;
+ bool read_more_than_requested = false;
+
+ size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
+ Status &error) override {
+ if (read_less_than_requested && size > 0)
+ size--;
+ if (read_more_than_requested)
+ size *= 2;
+ uint8_t *buffer = static_cast<uint8_t *>(buf);
+ for (size_t addr = vm_addr; addr < vm_addr + size; addr++)
+ buffer[addr - vm_addr] = static_cast<uint8_t>(addr); // LSB of addr.
+ return size;
+ }
+ // Boilerplate, nothing interesting below.
+ DummyReaderProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp)
+ : Process(target_sp, listener_sp) {}
+ bool CanDebug(lldb::TargetSP, bool) override { return true; }
+ Status DoDestroy() override { return {}; }
+ void RefreshStateAfterStop() override {}
+ bool DoUpdateThreadList(ThreadList &, ThreadList &) override { return false; }
+ llvm::StringRef GetPluginName() override { return "Dummy"; }
+};
+
+TEST_F(MemoryTest, TestReadMemoryRanges) {
+ ArchSpec arch("x86_64-apple-macosx-");
+
+ Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch));
+
+ DebuggerSP debugger_sp = Debugger::CreateInstance();
+ ASSERT_TRUE(debugger_sp);
+
+ TargetSP target_sp = CreateTarget(debugger_sp, arch);
+ ASSERT_TRUE(target_sp);
+
+ ListenerSP listener_sp(Listener::MakeListener("dummy"));
+ ProcessSP process_sp =
+ std::make_shared<DummyReaderProcess>(target_sp, listener_sp);
+ ASSERT_TRUE(process_sp);
+
+ {
+ llvm::SmallVector<uint8_t, 0> buffer(1024, 0);
+ // Read 8 ranges of 128 bytes with arbitrary base addresses.
+ llvm::SmallVector<Range<addr_t, size_t>> ranges = {
+ {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128},
+ {0xffaabbccdd, 128}, {0x0, 128}, {0x4242424242, 128},
+ {0x17171717, 128}, {0x99999, 128}};
+
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results =
+ process_sp->ReadMemoryRanges(ranges, buffer);
+
+ for (auto [range, memory] : llvm::zip(ranges, read_results)) {
+ ASSERT_EQ(memory.size(), 128u);
+ addr_t range_base = range.GetRangeBase();
+ for (auto [idx, byte] : llvm::enumerate(memory))
+ ASSERT_EQ(byte, static_cast<uint8_t>(range_base + idx));
+ }
+ }
+
+ auto &dummy_process = static_cast<DummyReaderProcess &>(*process_sp);
+ dummy_process.read_less_than_requested = true;
+ {
+ llvm::SmallVector<uint8_t, 0> buffer(1024, 0);
+ llvm::SmallVector<Range<addr_t, size_t>> ranges = {
+ {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128}};
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results =
+ dummy_process.ReadMemoryRanges(ranges, buffer);
+ for (auto [range, memory] : llvm::zip(ranges, read_results)) {
+ ASSERT_LT(memory.size(), 128u);
+ addr_t range_base = range.GetRangeBase();
+ for (auto [idx, byte] : llvm::enumerate(memory))
+ ASSERT_EQ(byte, static_cast<uint8_t>(range_base + idx));
+ }
+ }
+}
+
+using MemoryDeathTest = MemoryTest;
+
+TEST_F(MemoryDeathTest, TestReadMemoryRangesReturnsTooMuch) {
+ ArchSpec arch("x86_64-apple-macosx-");
+ Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch));
+ DebuggerSP debugger_sp = Debugger::CreateInstance();
+ ASSERT_TRUE(debugger_sp);
+ TargetSP target_sp = CreateTarget(debugger_sp, arch);
+ ASSERT_TRUE(target_sp);
+ ListenerSP listener_sp(Listener::MakeListener("dummy"));
+ ProcessSP process_sp =
+ std::make_shared<DummyReaderProcess>(target_sp, listener_sp);
+ ASSERT_TRUE(process_sp);
+
+ auto &dummy_process = static_cast<DummyReaderProcess &>(*process_sp);
+ dummy_process.read_more_than_requested = true;
+ llvm::SmallVector<uint8_t, 0> buffer(1024, 0);
+ llvm::SmallVector<Range<addr_t, size_t>> ranges = {{0x12345, 128}};
+
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results;
+ ASSERT_DEBUG_DEATH(
+ { read_results = process_sp->ReadMemoryRanges(ranges, buffer); },
+ "read more than requested bytes");
+#ifdef NDEBUG
+ // With asserts off, the read should return empty ranges.
+ ASSERT_EQ(read_results.size(), 1u);
+ ASSERT_TRUE(read_results[0].empty());
+#endif
+}
+
+TEST_F(MemoryDeathTest, TestReadMemoryRangesWithShortBuffer) {
+ ArchSpec arch("x86_64-apple-macosx-");
+ Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch));
+ DebuggerSP debugger_sp = Debugger::CreateInstance();
+ ASSERT_TRUE(debugger_sp);
+ TargetSP target_sp = CreateTarget(debugger_sp, arch);
+ ASSERT_TRUE(target_sp);
+ ListenerSP listener_sp(Listener::MakeListener("dummy"));
+ ProcessSP process_sp =
+ std::make_shared<DummyReaderProcess>(target_sp, listener_sp);
+ ASSERT_TRUE(process_sp);
+
+ llvm::SmallVector<uint8_t, 0> short_buffer(10, 0);
+ llvm::SmallVector<Range<addr_t, size_t>> ranges = {{0x12345, 128},
+ {0x11, 128}};
+ llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results;
+ ASSERT_DEBUG_DEATH(
+ { read_results = process_sp->ReadMemoryRanges(ranges, short_buffer); },
+ "provided buffer is too short");
+#ifdef NDEBUG
+ // With asserts off, the read should return empty ranges.
+ ASSERT_EQ(read_results.size(), ranges.size());
+ for (llvm::MutableArrayRef<uint8_t> result : read_results)
+ ASSERT_TRUE(result.empty());
+#endif
+}
diff --git a/llvm/docs/CommandGuide/llvm-dwarfdump.rst b/llvm/docs/CommandGuide/llvm-dwarfdump.rst
index 27ad4226..1378302 100644
--- a/llvm/docs/CommandGuide/llvm-dwarfdump.rst
+++ b/llvm/docs/CommandGuide/llvm-dwarfdump.rst
@@ -83,7 +83,7 @@ OPTIONS
.. option:: -n <name>, --name=<name>
Find and print all debug info entries whose name
- (`DW_AT_name` attribute) is <name>.
+ (`DW_AT_name`/`DW_AT_linkage_name` attribute) is <name>.
.. option:: --lookup=<address>
diff --git a/llvm/include/llvm/ADT/Twine.h b/llvm/include/llvm/ADT/Twine.h
index d9f9c0f..e3b4d5e 100644
--- a/llvm/include/llvm/ADT/Twine.h
+++ b/llvm/include/llvm/ADT/Twine.h
@@ -285,7 +285,7 @@ public:
}
/// Construct from a StringRef.
- /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) {
+ /*implicit*/ Twine(StringRef Str) : LHSKind(PtrAndLengthKind) {
LHS.ptrAndLength.ptr = Str.data();
LHS.ptrAndLength.length = Str.size();
assert(isValid() && "Invalid twine!");
@@ -352,7 +352,7 @@ public:
// right thing. Yet.
/// Construct as the concatenation of a C string and a StringRef.
- /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
+ /*implicit*/ Twine(const char *LHS, StringRef RHS)
: LHSKind(CStringKind), RHSKind(PtrAndLengthKind) {
this->LHS.cString = LHS;
this->RHS.ptrAndLength.ptr = RHS.data();
@@ -361,7 +361,7 @@ public:
}
/// Construct as the concatenation of a StringRef and a C string.
- /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
+ /*implicit*/ Twine(StringRef LHS, const char *RHS)
: LHSKind(PtrAndLengthKind), RHSKind(CStringKind) {
this->LHS.ptrAndLength.ptr = LHS.data();
this->LHS.ptrAndLength.length = LHS.size();
@@ -530,14 +530,14 @@ inline Twine operator+(const Twine &LHS, const Twine &RHS) {
/// Additional overload to guarantee simplified codegen; this is equivalent to
/// concat().
-inline Twine operator+(const char *LHS, const StringRef &RHS) {
+inline Twine operator+(const char *LHS, StringRef RHS) {
return Twine(LHS, RHS);
}
/// Additional overload to guarantee simplified codegen; this is equivalent to
/// concat().
-inline Twine operator+(const StringRef &LHS, const char *RHS) {
+inline Twine operator+(StringRef LHS, const char *RHS) {
return Twine(LHS, RHS);
}
diff --git a/llvm/include/llvm/CodeGen/MIR2Vec.h b/llvm/include/llvm/CodeGen/MIR2Vec.h
index 7b1b5d9..f6b0571 100644
--- a/llvm/include/llvm/CodeGen/MIR2Vec.h
+++ b/llvm/include/llvm/CodeGen/MIR2Vec.h
@@ -52,11 +52,21 @@ class LLVMContext;
class MIR2VecVocabLegacyAnalysis;
class TargetInstrInfo;
+enum class MIR2VecKind { Symbolic };
+
namespace mir2vec {
+
+// Forward declarations
+class MIREmbedder;
+class SymbolicMIREmbedder;
+
extern llvm::cl::OptionCategory MIR2VecCategory;
extern cl::opt<float> OpcWeight;
using Embedding = ir2vec::Embedding;
+using MachineInstEmbeddingsMap = DenseMap<const MachineInstr *, Embedding>;
+using MachineBlockEmbeddingsMap =
+ DenseMap<const MachineBasicBlock *, Embedding>;
/// Class for storing and accessing the MIR2Vec vocabulary.
/// The MIRVocabulary class manages seed embeddings for LLVM Machine IR
@@ -107,19 +117,91 @@ public:
const_iterator end() const { return Storage.end(); }
- /// Total number of entries in the vocabulary
- size_t getCanonicalSize() const { return Storage.size(); }
-
MIRVocabulary() = delete;
/// Factory method to create MIRVocabulary from vocabulary map
static Expected<MIRVocabulary> create(VocabMap &&Entries,
const TargetInstrInfo &TII);
+ /// Create a dummy vocabulary for testing purposes.
+ static Expected<MIRVocabulary>
+ createDummyVocabForTest(const TargetInstrInfo &TII, unsigned Dim = 1);
+
+ /// Total number of entries in the vocabulary
+ size_t getCanonicalSize() const { return Storage.size(); }
+
private:
MIRVocabulary(VocabMap &&Entries, const TargetInstrInfo &TII);
};
+/// Base class for MIR embedders
+class MIREmbedder {
+protected:
+ const MachineFunction &MF;
+ const MIRVocabulary &Vocab;
+
+ /// Dimension of the embeddings; Captured from the vocabulary
+ const unsigned Dimension;
+
+ /// Weight for opcode embeddings
+ const float OpcWeight;
+
+ MIREmbedder(const MachineFunction &MF, const MIRVocabulary &Vocab)
+ : MF(MF), Vocab(Vocab), Dimension(Vocab.getDimension()),
+ OpcWeight(mir2vec::OpcWeight) {}
+
+ /// Function to compute embeddings.
+ Embedding computeEmbeddings() const;
+
+ /// Function to compute the embedding for a given machine basic block.
+ Embedding computeEmbeddings(const MachineBasicBlock &MBB) const;
+
+ /// Function to compute the embedding for a given machine instruction.
+ /// Specific to the kind of embeddings being computed.
+ virtual Embedding computeEmbeddings(const MachineInstr &MI) const = 0;
+
+public:
+ virtual ~MIREmbedder() = default;
+
+ /// Factory method to create an Embedder object of the specified kind
+ /// Returns nullptr if the requested kind is not supported.
+ static std::unique_ptr<MIREmbedder> create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab);
+
+ /// Computes and returns the embedding for a given machine instruction MI in
+ /// the machine function MF.
+ Embedding getMInstVector(const MachineInstr &MI) const {
+ return computeEmbeddings(MI);
+ }
+
+ /// Computes and returns the embedding for a given machine basic block in the
+ /// machine function MF.
+ Embedding getMBBVector(const MachineBasicBlock &MBB) const {
+ return computeEmbeddings(MBB);
+ }
+
+ /// Computes and returns the embedding for the current machine function.
+ Embedding getMFunctionVector() const {
+ // Currently, we always (re)compute the embeddings for the function. This is
+ // cheaper than caching the vector.
+ return computeEmbeddings();
+ }
+};
+
+/// Class for computing Symbolic embeddings
+/// Symbolic embeddings are constructed based on the entity-level
+/// representations obtained from the MIR Vocabulary.
+class SymbolicMIREmbedder : public MIREmbedder {
+private:
+ Embedding computeEmbeddings(const MachineInstr &MI) const override;
+
+public:
+ SymbolicMIREmbedder(const MachineFunction &F, const MIRVocabulary &Vocab);
+ static std::unique_ptr<SymbolicMIREmbedder>
+ create(const MachineFunction &MF, const MIRVocabulary &Vocab);
+};
+
} // namespace mir2vec
/// Pass to analyze and populate MIR2Vec vocabulary from a module
@@ -166,6 +248,31 @@ public:
}
};
+/// This pass prints the MIR2Vec embeddings for machine functions, basic blocks,
+/// and instructions
+class MIR2VecPrinterLegacyPass : public MachineFunctionPass {
+ raw_ostream &OS;
+
+public:
+ static char ID;
+ explicit MIR2VecPrinterLegacyPass(raw_ostream &OS)
+ : MachineFunctionPass(ID), OS(OS) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MIR2VecVocabLegacyAnalysis>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "MIR2Vec Embedder Printer Pass";
+ }
+};
+
+/// Create a machine pass that prints MIR2Vec embeddings
+MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS);
+
} // namespace llvm
#endif // LLVM_CODEGEN_MIR2VEC_H \ No newline at end of file
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 272b4ac..7fae550 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -93,6 +93,10 @@ createMachineFunctionPrinterPass(raw_ostream &OS,
LLVM_ABI MachineFunctionPass *
createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS);
+/// MIR2VecPrinter pass - This pass prints out the MIR2Vec embeddings for
+/// machine functions, basic blocks and instructions.
+LLVM_ABI MachineFunctionPass *createMIR2VecPrinterLegacyPass(raw_ostream &OS);
+
/// StackFramePrinter pass - This pass prints out the machine function's
/// stack frame to the given stream as a debugging tool.
LLVM_ABI MachineFunctionPass *createStackFrameLayoutAnalysisPass();
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 73f2c55..64a7563 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2459,6 +2459,12 @@ public:
return ISD::ANY_EXTEND;
}
+ /// Returns how the platform's atomic rmw operations expect their input
+ /// argument to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND).
+ virtual ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const {
+ return ISD::ANY_EXTEND;
+ }
+
/// @}
/// Returns true if we should normalize
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 12d1c25..e6cce9a4 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2851,7 +2851,15 @@ def int_ptrauth_blend :
def int_ptrauth_sign_generic :
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+//===----------------- AllocToken Intrinsics ------------------------------===//
+
+// Return the token ID for the given !alloc_token metadata.
+def int_alloc_token_id :
+ DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
+ [IntrNoMem, NoUndef<RetIndex>]>;
+
//===----------------------------------------------------------------------===//
+
//===------- Convergence Intrinsics ---------------------------------------===//
def int_experimental_convergence_entry
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index ada3523..0135989 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -236,7 +236,7 @@ private:
static bool hasAEABILibcalls(const Triple &TT) {
return TT.isTargetAEABI() || TT.isTargetGNUAEABI() ||
- TT.isTargetMuslAEABI() || TT.isAndroid();
+ TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isAndroid();
}
LLVM_READONLY
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index cd774e7..d507ba2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -222,6 +222,7 @@ LLVM_ABI void
initializeMachineSanitizerBinaryMetadataLegacyPass(PassRegistry &);
LLVM_ABI void initializeMIR2VecVocabLegacyAnalysisPass(PassRegistry &);
LLVM_ABI void initializeMIR2VecVocabPrinterLegacyPassPass(PassRegistry &);
+LLVM_ABI void initializeMIR2VecPrinterLegacyPassPass(PassRegistry &);
LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &);
LLVM_ABI void initializeMachineSinkingLegacyPass(PassRegistry &);
LLVM_ABI void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index dc8cd86d..5e43444 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -935,7 +935,8 @@ public:
getEnvironment() == Triple::GNUEABIHF ||
getEnvironment() == Triple::GNUEABIHFT64 ||
getEnvironment() == Triple::OpenHOS ||
- getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
+ getEnvironment() == Triple::MuslEABIHF || isOSFuchsia() ||
+ isAndroid()) &&
isOSBinFormatELF() && !isOSNetBSD();
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 8da51d0..b573023 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
return nullptr;
}
+/// Look for the following pattern and simplify %to_fold to %identicalPhi.
+/// Here %phi, %to_fold and %phi.next perform the same functionality as
+/// %identicalPhi and hence the select instruction %to_fold can be folded
+/// into %identicalPhi.
+///
+/// BB1:
+/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+/// ...
+/// %identicalPhi.next = select %cmp, %val, %identicalPhi
+/// (or select %cmp, %identicalPhi, %val)
+/// %to_fold = select %cmp2, %identicalPhi, %phi
+/// %phi.next = select %cmp, %val, %to_fold
+/// (or select %cmp, %to_fold, %val)
+///
+/// Prove that %phi and %identicalPhi are the same by induction:
+///
+/// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+/// Inductive case:
+/// Suppose %phi and %identicalPhi are equal at iteration i.
+/// We look at their values at iteration i+1 which are %phi.next and
+/// %identicalPhi.next. They would have become different only when %cmp is
+/// false and the corresponding values %to_fold and %identicalPhi differ
+/// (similar reason for the other "or" case in the bracket).
+///
+/// The only condition when %to_fold and %identicalPh could differ is when %cmp2
+/// is false and %to_fold is %phi, which contradicts our inductive hypothesis
+/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+/// always equal at iteration i+1.
+bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
+ if (PN.getParent() != IdenticalPN.getParent())
+ return false;
+ if (PN.getNumIncomingValues() != 2)
+ return false;
+
+ // Check that only the backedge incoming value is different.
+ unsigned DiffVals = 0;
+ BasicBlock *DiffValBB = nullptr;
+ for (unsigned i = 0; i < 2; i++) {
+ BasicBlock *PredBB = PN.getIncomingBlock(i);
+ if (PN.getIncomingValueForBlock(PredBB) !=
+ IdenticalPN.getIncomingValueForBlock(PredBB)) {
+ DiffVals++;
+ DiffValBB = PredBB;
+ }
+ }
+ if (DiffVals != 1)
+ return false;
+ // Now check that the backedge incoming values are two select
+ // instructions with the same condition. Either their true
+ // values are the same, or their false values are the same.
+ auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB));
+ auto *IdenticalSI =
+ dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
+ if (!SI || !IdenticalSI)
+ return false;
+ if (SI->getCondition() != IdenticalSI->getCondition())
+ return false;
+
+ SelectInst *SIOtherVal = nullptr;
+ Value *IdenticalSIOtherVal = nullptr;
+ if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
+ IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+ } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
+ IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+ } else {
+ return false;
+ }
+
+ // Now check that the other values in select, i.e., %to_fold and
+ // %identicalPhi, are essentially the same value.
+ if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN)
+ return false;
+ if (!(SIOtherVal->getTrueValue() == &IdenticalPN &&
+ SIOtherVal->getFalseValue() == &PN) &&
+ !(SIOtherVal->getTrueValue() == &PN &&
+ SIOtherVal->getFalseValue() == &IdenticalPN))
+ return false;
+ return true;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
if (Imp)
return *Imp ? TrueVal : FalseVal;
-
+ // Look for same PHIs in the true and false values.
+ if (auto *TruePHI = dyn_cast<PHINode>(TrueVal))
+ if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) {
+ if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI))
+ return FalseVal;
+ if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI))
+ return TrueVal;
+ }
return nullptr;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e2af0c5..a114406 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1438,7 +1438,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
BBFreqEnabled,
BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
- static_cast<bool>(BBAddrMapSkipEmitBBEntries),
+ BBAddrMapSkipEmitBBEntries,
HasCalls,
false};
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index c438eae..9795a0b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -98,6 +98,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineUniformityAnalysisPassPass(Registry);
initializeMIR2VecVocabLegacyAnalysisPass(Registry);
initializeMIR2VecVocabPrinterLegacyPassPass(Registry);
+ initializeMIR2VecPrinterLegacyPassPass(Registry);
initializeMachineUniformityInfoPrinterPassPass(Registry);
initializeMachineVerifierLegacyPassPass(Registry);
initializeObjCARCContractLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp
index 5c78d98..99be1fc0 100644
--- a/llvm/lib/CodeGen/MIR2Vec.cpp
+++ b/llvm/lib/CodeGen/MIR2Vec.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIR2Vec.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Module.h"
@@ -29,20 +30,30 @@ using namespace mir2vec;
STATISTIC(MIRVocabMissCounter,
"Number of lookups to MIR entities not present in the vocabulary");
-cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options");
+namespace llvm {
+namespace mir2vec {
+cl::OptionCategory MIR2VecCategory("MIR2Vec Options");
// FIXME: Use a default vocab when not specified
static cl::opt<std::string>
VocabFile("mir2vec-vocab-path", cl::Optional,
cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""),
cl::cat(MIR2VecCategory));
-cl::opt<float>
- llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
- cl::desc("Weight for machine opcode embeddings"),
- cl::cat(MIR2VecCategory));
+cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
+ cl::desc("Weight for machine opcode embeddings"),
+ cl::cat(MIR2VecCategory));
+cl::opt<MIR2VecKind> MIR2VecEmbeddingKind(
+ "mir2vec-kind", cl::Optional,
+ cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic",
+ "Generate symbolic embeddings for MIR")),
+ cl::init(MIR2VecKind::Symbolic), cl::desc("MIR2Vec embedding kind"),
+ cl::cat(MIR2VecCategory));
+
+} // namespace mir2vec
+} // namespace llvm
//===----------------------------------------------------------------------===//
-// Vocabulary Implementation
+// Vocabulary
//===----------------------------------------------------------------------===//
MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries,
@@ -188,6 +199,28 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() {
<< " unique base opcodes\n");
}
+Expected<MIRVocabulary>
+MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII,
+ unsigned Dim) {
+ assert(Dim > 0 && "Dimension must be greater than zero");
+
+ float DummyVal = 0.1f;
+
+ // Create dummy embeddings for all canonical opcode names
+ VocabMap DummyVocabMap;
+ for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) {
+ std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode));
+ if (DummyVocabMap.count(BaseOpcode) == 0) {
+ // Only add if not already present
+ DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal);
+ DummyVal += 0.1f;
+ }
+ }
+
+ // Create and return vocabulary with dummy embeddings
+ return MIRVocabulary::create(std::move(DummyVocabMap), TII);
+}
+
//===----------------------------------------------------------------------===//
// MIR2VecVocabLegacyAnalysis Implementation
//===----------------------------------------------------------------------===//
@@ -258,7 +291,73 @@ MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) {
}
//===----------------------------------------------------------------------===//
-// Printer Passes Implementation
+// MIREmbedder and its subclasses
+//===----------------------------------------------------------------------===//
+
+std::unique_ptr<MIREmbedder> MIREmbedder::create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ switch (Mode) {
+ case MIR2VecKind::Symbolic:
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+ }
+ return nullptr;
+}
+
+Embedding MIREmbedder::computeEmbeddings(const MachineBasicBlock &MBB) const {
+ Embedding MBBVector(Dimension, 0);
+
+ // Get instruction info for opcode name resolution
+ const auto &Subtarget = MF.getSubtarget();
+ const auto *TII = Subtarget.getInstrInfo();
+ if (!TII) {
+ MF.getFunction().getContext().emitError(
+ "MIR2Vec: No TargetInstrInfo available; cannot compute embeddings");
+ return MBBVector;
+ }
+
+ // Process each machine instruction in the basic block
+ for (const auto &MI : MBB) {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ continue;
+ MBBVector += computeEmbeddings(MI);
+ }
+
+ return MBBVector;
+}
+
+Embedding MIREmbedder::computeEmbeddings() const {
+ Embedding MFuncVector(Dimension, 0);
+
+ // Consider all reachable machine basic blocks in the function
+ for (const auto *MBB : depth_first(&MF))
+ MFuncVector += computeEmbeddings(*MBB);
+ return MFuncVector;
+}
+
+SymbolicMIREmbedder::SymbolicMIREmbedder(const MachineFunction &MF,
+ const MIRVocabulary &Vocab)
+ : MIREmbedder(MF, Vocab) {}
+
+std::unique_ptr<SymbolicMIREmbedder>
+SymbolicMIREmbedder::create(const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+}
+
+Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ return Embedding(Dimension, 0);
+
+ // Todo: Add operand/argument contributions
+
+ return Vocab[MI.getOpcode()];
+}
+
+//===----------------------------------------------------------------------===//
+// Printer Passes
//===----------------------------------------------------------------------===//
char MIR2VecVocabPrinterLegacyPass::ID = 0;
@@ -297,3 +396,56 @@ MachineFunctionPass *
llvm::createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS) {
return new MIR2VecVocabPrinterLegacyPass(OS);
}
+
+char MIR2VecPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+INITIALIZE_PASS_DEPENDENCY(MIR2VecVocabLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+
+bool MIR2VecPrinterLegacyPass::runOnMachineFunction(MachineFunction &MF) {
+ auto &Analysis = getAnalysis<MIR2VecVocabLegacyAnalysis>();
+ auto VocabOrErr =
+ Analysis.getMIR2VecVocabulary(*MF.getFunction().getParent());
+ assert(VocabOrErr && "Failed to get MIR2Vec vocabulary");
+ auto &MIRVocab = *VocabOrErr;
+
+ auto Emb = mir2vec::MIREmbedder::create(MIR2VecEmbeddingKind, MF, MIRVocab);
+ if (!Emb) {
+ OS << "Error creating MIR2Vec embeddings for function " << MF.getName()
+ << "\n";
+ return false;
+ }
+
+ OS << "MIR2Vec embeddings for machine function " << MF.getName() << ":\n";
+ OS << "Machine Function vector: ";
+ Emb->getMFunctionVector().print(OS);
+
+ OS << "Machine basic block vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ OS << "Machine basic block: " << MBB.getFullName() << ":\n";
+ Emb->getMBBVector(MBB).print(OS);
+ }
+
+ OS << "Machine instruction vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ // Skip debug instructions as they are not
+ // embedded
+ if (MI.isDebugInstr())
+ continue;
+
+ OS << "Machine instruction: ";
+ MI.print(OS);
+ Emb->getMInstVector(MI).print(OS);
+ }
+ }
+
+ return false;
+}
+
+MachineFunctionPass *llvm::createMIR2VecPrinterLegacyPass(raw_ostream &OS) {
+ return new MIR2VecPrinterLegacyPass(OS);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 437d0f4..bf1abfe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3765,6 +3765,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
Res = SoftPromoteHalfOp_Op0WithStrict(N);
break;
case ISD::FP_TO_SINT_SAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 88a4a8b..b1776ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -429,7 +429,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op2 = N->getOperand(2);
+ switch (TLI.getExtendForAtomicRMWArg(N->getOpcode())) {
+ case ISD::SIGN_EXTEND:
+ Op2 = SExtPromotedInteger(Op2);
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
+ Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42ec8ba..7cce033 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 35e1127..b1a668e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1089,7 +1089,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
// Register based DivRem for AEABI (RTABI 4.2)
if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
- TT.isTargetMuslAEABI() || TT.isOSWindows()) {
+ TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
+ setOperationAction(ISD::LROUND, MVT::f16, Expand);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
@@ -20574,7 +20575,7 @@ static TargetLowering::ArgListTy getDivRemArgList(
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
- Subtarget->isTargetWindows()) &&
+ Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index b2d368e..4a0883c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -343,6 +343,7 @@ public:
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index c8866bf..42e90f0 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -294,6 +294,14 @@ public:
if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
RootSignature->eraseFromParent();
+ // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+ // causes all tests using the DXIL Validator to fail.
+ //
+ // This is a temporary fix and should be replaced with a whitelist once
+ // we have determined all metadata that the DXIL Validator allows
+ if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+ ErrNo->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a94e131..54c8972 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget.useHVX128BOps())
+ if (Subtarget.useHVX128BOps()) {
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
+ }
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
@@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
// Splat the input into a 32-element i32 vector, then AND each element
// with a unique bitmask to isolate individual bits.
- if (ResTy == MVT::v32i1 &&
- (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
- Subtarget.useHVX128BOps()) {
- SDValue Val32 = Val;
- if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
- Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
-
+ auto bitcastI32ToV32I1 = [&](SDValue Val32) {
+ assert(Val32.getValueType().getSizeInBits() == 32 &&
+ "Input must be 32 bits");
MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
SmallVector<SDValue, 32> Mask;
@@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
- return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
+ return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
+ };
+ // === Case: v32i1 ===
+ if (ResTy == MVT::v32i1 &&
+ (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
+ Subtarget.useHVX128BOps()) {
+ SDValue Val32 = Val;
+ if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
+ Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
+ return bitcastI32ToV32I1(Val32);
+ }
+ // === Case: v64i1 ===
+ if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
+ // Split i64 into lo/hi 32-bit halves.
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
+ SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
+
+ // Reuse the same 32-bit logic twice.
+ SDValue LoRes = bitcastI32ToV32I1(Lo);
+ SDValue HiRes = bitcastI32ToV32I1(Hi);
+
+ // Concatenate into a v64i1 predicate.
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
}
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 5dd4bf4..98b636e 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicSwap64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadOr32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
+ case RISCV::PseudoAtomicLoadOr64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
+ case RISCV::PseudoAtomicLoadXor32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadXor64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
+ NextMBBI);
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
case RISCV::PseudoAtomicLoadNand64:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
NextMBBI);
+ case RISCV::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
+ NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
@@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::And:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Or:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Xor:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
case AtomicRMWInst::Nand:
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -433,38 +521,85 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
.addReg(ShamtReg);
}
-bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
- assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+static void doAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- MachineFunction *MF = MBB.getParent();
- auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ // .loophead:
+ // lr.[w|d] dest, (addr)
+ // mv scratch, dest
+ // ifnochangeneeded scratch, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(DestReg)
+ .addImm(0);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopHeadMBB);
- MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
- MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+ // .loopifbody:
+ // mv scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
- LoopHeadMBB->addSuccessor(LoopTailMBB);
- LoopIfBodyMBB->addSuccessor(LoopTailMBB);
- LoopTailMBB->addSuccessor(LoopHeadMBB);
- LoopTailMBB->addSuccessor(DoneMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopHeadMBB);
+ // .looptail:
+ // sc.[w|d] scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+}
+static void doMaskedAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register Scratch1Reg = MI.getOperand(1).getReg();
Register Scratch2Reg = MI.getOperand(2).getReg();
@@ -541,6 +676,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch1Reg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ if (!IsMasked)
+ doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
+ LoopTailMBB, DoneMBB, BinOp, Width, STI);
+ else
+ doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
+ LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
+ Width, STI);
NextMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 19992e6..3abbbb3 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -218,6 +218,7 @@ def HasStdExtZaamo
: Predicate<"Subtarget->hasStdExtZaamo()">,
AssemblerPredicate<(any_of FeatureStdExtZaamo),
"'Zaamo' (Atomic Memory Operations)">;
+def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">;
def FeatureStdExtZalrsc
: RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">;
@@ -1864,7 +1865,7 @@ def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
- : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
+ : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a77d765..26fe9ed 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
else if (Subtarget.hasStdExtZicbop())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
- if (Subtarget.hasStdExtA()) {
+ if (Subtarget.hasStdExtZalrsc()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
setMinCmpXchgSizeInBits(8);
@@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtZaamo())
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
if (Subtarget.hasForcedAtomics()) {
@@ -21875,7 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
// result is then sign extended to XLEN. With +A, the minimum width is
// 32 for both 64 and 32.
assert(getMinCmpXchgSizeInBits() == 32);
- assert(Subtarget.hasStdExtA());
+ assert(Subtarget.hasStdExtZalrsc());
return Op.getValueSizeInBits() - 31;
}
break;
@@ -24471,6 +24471,25 @@ ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
}
+ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const {
+ // Zaamo will use amo<op>.w which does not require extension.
+ if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
+ return ISD::ANY_EXTEND;
+
+ // Zalrsc pseudo expansions with comparison require sign-extension.
+ assert(Subtarget.hasStdExtZalrsc());
+ switch (Op) {
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return ISD::SIGN_EXTEND;
+ default:
+ break;
+ }
+ return ISD::ANY_EXTEND;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed7..9e3e2a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ public:
}
ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+ ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 571d72f..5c81a09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -158,9 +158,9 @@ class seq_cst_store<PatFrag base>
}
} // IsAtomic = 1
-// Atomic load/store are available under both +a and +force-atomics.
-// Fences will be inserted for atomic load/stores according to the logic in
-// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+// Atomic load/store are available under +zalrsc (thus also +a) and
+// +force-atomics. Fences will be inserted for atomic load/stores according to
+// the logic in RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
// The normal loads/stores are relaxed (unordered) loads/stores that don't have
// any ordering. This is necessary because AtomicExpandPass has added fences to
// atomic load/stores and changed them to unordered ones.
@@ -308,7 +308,65 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap32 : PseudoAMO;
+def PseudoAtomicLoadAdd32 : PseudoAMO;
+def PseudoAtomicLoadSub32 : PseudoAMO;
+def PseudoAtomicLoadAnd32 : PseudoAMO;
+def PseudoAtomicLoadOr32 : PseudoAMO;
+def PseudoAtomicLoadXor32 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax32 : PseudoAMO;
+def PseudoAtomicLoadMin32 : PseudoAMO;
+def PseudoAtomicLoadUMax32 : PseudoAMO;
+def PseudoAtomicLoadUMin32 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i32", PseudoAtomicSwap32>;
+defm : PseudoAMOPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
+defm : PseudoAMOPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
+defm : PseudoAMOPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
+defm : PseudoAMOPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
+defm : PseudoAMOPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoAMOPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoAMOPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
+defm : PseudoAMOPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoAMOPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo]
+
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap64 : PseudoAMO;
+def PseudoAtomicLoadAdd64 : PseudoAMO;
+def PseudoAtomicLoadSub64 : PseudoAMO;
+def PseudoAtomicLoadAnd64 : PseudoAMO;
+def PseudoAtomicLoadOr64 : PseudoAMO;
+def PseudoAtomicLoadXor64 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax64 : PseudoAMO;
+def PseudoAtomicLoadMin64 : PseudoAMO;
+def PseudoAtomicLoadUMax64 : PseudoAMO;
+def PseudoAtomicLoadUMin64 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i64", PseudoAtomicSwap64, i64>;
+defm : PseudoAMOPat<"atomic_load_add_i64", PseudoAtomicLoadAdd64, i64>;
+defm : PseudoAMOPat<"atomic_load_sub_i64", PseudoAtomicLoadSub64, i64>;
+defm : PseudoAMOPat<"atomic_load_and_i64", PseudoAtomicLoadAnd64, i64>;
+defm : PseudoAMOPat<"atomic_load_or_i64", PseudoAtomicLoadOr64, i64>;
+defm : PseudoAMOPat<"atomic_load_xor_i64", PseudoAtomicLoadXor64, i64>;
+defm : PseudoAMOPat<"atomic_load_max_i64", PseudoAtomicLoadMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_min_i64", PseudoAtomicLoadMin64, i64>;
+defm : PseudoAMOPat<"atomic_load_umax_i64", PseudoAtomicLoadUMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_umin_i64", PseudoAtomicLoadUMin64, i64>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64]
+
+let Predicates = [HasStdExtZalrsc] in {
let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
@@ -347,14 +405,14 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax,
PseudoMaskedAtomicLoadUMax32>;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin,
PseudoMaskedAtomicLoadUMin32>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
-let Predicates = [HasStdExtA, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, IsRV64] in {
let Size = 20 in
def PseudoAtomicLoadNand64 : PseudoAMO;
defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>;
-} // Predicates = [HasStdExtA, IsRV64]
+} // Predicates = [HasStdExtZalrsc, IsRV64]
/// Compare and exchange
@@ -385,17 +443,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas] in {
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas, IsRV64] in {
def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
}
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc] in {
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
@@ -412,4 +470,4 @@ def : Pat<(XLenVT (int_riscv_masked_cmpxchg
(XLenVT GPR:$mask), (XLenVT timm:$ordering))),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 5591d9f..021353a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -355,9 +355,9 @@ private:
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
Register &ReadReg, MachineInstr &InsertionPoint) const;
- bool generateImageRead(Register &ResVReg, const SPIRVType *ResType,
- Register ImageReg, Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const;
+ bool generateImageReadOrFetch(Register &ResVReg, const SPIRVType *ResType,
+ Register ImageReg, Register IdxReg,
+ DebugLoc Loc, MachineInstr &Pos) const;
bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const;
bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue,
Register ResVReg, const SPIRVType *ResType,
@@ -1321,8 +1321,8 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
Register IdxReg = IntPtrDef->getOperand(3).getReg();
- return generateImageRead(ResVReg, ResType, NewHandleReg, IdxReg,
- I.getDebugLoc(), I);
+ return generateImageReadOrFetch(ResVReg, ResType, NewHandleReg, IdxReg,
+ I.getDebugLoc(), I);
}
}
@@ -3639,27 +3639,33 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(
DebugLoc Loc = I.getDebugLoc();
MachineInstr &Pos = I;
- return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos);
+ return generateImageReadOrFetch(ResVReg, ResType, NewImageReg, IdxReg, Loc,
+ Pos);
}
-bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
- const SPIRVType *ResType,
- Register ImageReg,
- Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const {
+bool SPIRVInstructionSelector::generateImageReadOrFetch(
+ Register &ResVReg, const SPIRVType *ResType, Register ImageReg,
+ Register IdxReg, DebugLoc Loc, MachineInstr &Pos) const {
SPIRVType *ImageType = GR.getSPIRVTypeForVReg(ImageReg);
assert(ImageType && ImageType->getOpcode() == SPIRV::OpTypeImage &&
"ImageReg is not an image type.");
+
bool IsSignedInteger =
sampledTypeIsSignedInteger(GR.getTypeForSPIRVType(ImageType));
+ // Check if the "sampled" operand of the image type is 1.
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpImageFetch
+ auto SampledOp = ImageType->getOperand(6);
+ bool IsFetch = (SampledOp.getImm() == 1);
uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
if (ResultSize == 4) {
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
@@ -3668,11 +3674,13 @@ bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
SPIRVType *ReadType = widenTypeToVec4(ResType, Pos);
Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType));
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ReadReg)
- .addUse(GR.getSPIRVTypeID(ReadType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ReadReg)
+ .addUse(GR.getSPIRVTypeID(ReadType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
bool Succeed = BMI.constrainAllUses(TII, TRI, RBI);
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 7882045..0fce5b9 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -567,8 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT) {
default:
if (TT.isOSNetBSD())
return "apcs-gnu";
- if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() ||
- TT.isOHOSFamily())
+ if (TT.isOSFreeBSD() || TT.isOSFuchsia() || TT.isOSOpenBSD() ||
+ TT.isOSHaiku() || TT.isOHOSFamily())
return "aapcs-linux";
return "aapcs";
}
@@ -648,6 +648,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
}
case llvm::Triple::OpenBSD:
return "cortex-a8";
+ case llvm::Triple::Fuchsia:
+ return "cortex-a53";
default:
switch (Triple.getEnvironment()) {
case llvm::Triple::EABIHF:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 975498f..5aa8de3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// select a, false, b -> select !a, b, false
if (match(TrueVal, m_Specific(Zero))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, FalseVal, Zero);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// select a, b, true -> select !a, true, b
if (match(FalseVal, m_Specific(One))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, One, TrueVal);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// DeMorgan in select form: !a && !b --> !(a || b)
// select !a, !b, false --> not (select a, true, b)
if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || TrueVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// DeMorgan in select form: !a || !b --> !(a && b)
// select !a, true, !b --> not (select a, b, false)
if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || FalseVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// select (select a, true, b), true, b -> select a, true, b
if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) &&
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 40720ae..29968b8 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -131,7 +132,7 @@ cl::opt<uint64_t> ClFallbackToken(
//===--- Statistics -------------------------------------------------------===//
-STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumFunctionsModified, "Functions modified");
STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
//===----------------------------------------------------------------------===//
@@ -140,9 +141,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
///
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenMetadata(const CallBase &CB) {
- MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
- if (!Ret)
- return nullptr;
+ MDNode *Ret = nullptr;
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
+ Ret = cast<MDNode>(MDV->getMetadata());
+ // If the intrinsic has an empty MDNode, type inference failed.
+ if (Ret->getNumOperands() == 0)
+ return nullptr;
+ } else {
+ Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+ if (!Ret)
+ return nullptr;
+ }
assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
@@ -315,6 +326,9 @@ private:
FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
LibFunc OriginalFunc);
+ /// Lower alloc_token_* intrinsics.
+ void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
+
/// Return the token ID from metadata in the call.
uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
@@ -336,21 +350,32 @@ bool AllocToken::instrumentFunction(Function &F) {
// Do not apply any instrumentation for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
- return false;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return false;
- // Only instrument functions that have the sanitize_alloc_token attribute.
- if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
- return false;
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+ SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
+
+ // Only instrument functions that have the sanitize_alloc_token attribute.
+ const bool InstrumentFunction =
+ F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
+ !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
// Collect all allocation calls to avoid iterator invalidation.
for (Instruction &I : instructions(F)) {
+ // Collect all alloc_token_* intrinsics.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ IntrinsicInsts.emplace_back(II);
+ continue;
+ }
+
+ if (!InstrumentFunction)
+ continue;
+
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
@@ -359,11 +384,21 @@ bool AllocToken::instrumentFunction(Function &F) {
}
bool Modified = false;
- for (auto &[CB, Func] : AllocCalls)
- Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
- if (Modified)
- NumFunctionsInstrumented++;
+ if (!AllocCalls.empty()) {
+ for (auto &[CB, Func] : AllocCalls)
+ Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+ if (Modified)
+ NumFunctionsModified++;
+ }
+
+ if (!IntrinsicInsts.empty()) {
+ for (auto *II : IntrinsicInsts)
+ replaceIntrinsicInst(II, ORE);
+ Modified = true;
+ NumFunctionsModified++;
+ }
+
return Modified;
}
@@ -381,7 +416,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB,
if (TLI.getLibFunc(*Callee, Func)) {
if (isInstrumentableLibFunc(Func, CB, TLI))
return Func;
- } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+ } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
return NotLibFunc;
}
@@ -528,6 +563,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
return TokenAlloc;
}
+void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
+ OptimizationRemarkEmitter &ORE) {
+ assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
+
+ uint64_t TokenID = getToken(*II, ORE);
+ Value *V = ConstantInt::get(IntPtrTy, TokenID);
+ II->replaceAllUsesWith(V);
+ II->eraseFromParent();
+}
+
} // namespace
AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b4e4dc2..c95c887 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
- if (!VPBB)
- return false;
-
- // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
- // VPBB as its entry, i.e., free of predecessors.
- if (auto *R = VPBB->getParent())
- return !R->isReplicator() && !VPBB->hasPredecessors();
-
- // A header dominates its second predecessor (the latch), with the other
- // predecessor being the preheader
- return VPB->getPredecessors().size() == 2 &&
- VPDT.dominates(VPB, VPB->getPredecessors()[1]);
-}
-
-bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- // A latch has a header as its second successor, with its other successor
- // leaving the loop. A preheader OTOH has a header as its first (and only)
- // successor.
- return VPB->getNumSuccessors() == 2 &&
- VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
-}
-
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ VPlan &Plan = *getPlan();
+ VPRegionBlock *NewRegion =
+ isReplicator()
+ ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
+ : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
return NewRegion;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8274431..167ba55 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4450,22 +4450,24 @@ public:
return VPB;
}
- /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
- /// IsReplicator is true, the region is a replicate region. The returned block
- /// is owned by the VPlan and deleted once the VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "",
- bool IsReplicator = false) {
- auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
+ /// Create a new loop region with \p Name and entry and exiting blocks set
+ /// to \p Entry and \p Exiting respectively, if set. The returned block is
+ /// owned by the VPlan and deleted once the VPlan is destroyed.
+ VPRegionBlock *createLoopRegion(const std::string &Name = "",
+ VPBlockBase *Entry = nullptr,
+ VPBlockBase *Exiting = nullptr) {
+ auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
+ : new VPRegionBlock(Name);
CreatedBlocks.push_back(VPB);
return VPB;
}
- /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set
- /// to nullptr. The returned block is owned by the VPlan and deleted once the
- /// VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(const std::string &Name = "") {
- auto *VPB = new VPRegionBlock(Name);
+ /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
+ /// returned block is owned by the VPlan and deleted once the VPlan is
+ /// destroyed.
+ VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const std::string &Name = "") {
+ auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
CreatedBlocks.push_back(VPB);
return VPB;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 332791a..65688a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
// LatchExitVPB, taking care to preserve the original predecessor & successor
// order of blocks. Set region entry and exiting after both HeaderVPB and
// LatchVPBB have been disconnected from their predecessors/successors.
- auto *R = Plan.createVPRegionBlock();
+ auto *R = Plan.createLoopRegion();
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
VPBlockUtils::disconnectBlocks(LatchVPBB, R);
VPBlockUtils::connectBlocks(PreheaderVPBB, R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7bf8d83..ff25ef5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
auto *Exiting =
Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
VPRegionBlock *Region =
- Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);
+ Plan.createReplicateRegion(Entry, Exiting, RegionName);
// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 10801c0..32e4b88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -8,6 +8,7 @@
#include "VPlanUtils.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
return UncountableCondition;
}
+
+bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ if (!VPBB)
+ return false;
+
+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
+ // VPBB as its entry, i.e., free of predecessors.
+ if (auto *R = VPBB->getParent())
+ return !R->isReplicator() && !VPBB->hasPredecessors();
+
+ // A header dominates its second predecessor (the latch), with the other
+ // predecessor being the preheader
+ return VPB->getPredecessors().size() == 2 &&
+ VPDT.dominates(VPB, VPB->getPredecessors()[1]);
+}
+
+bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ // A latch has a header as its second successor, with its other successor
+ // leaving the loop. A preheader OTOH has a header as its first (and only)
+ // successor.
+ return VPB->getNumSuccessors() == 2 &&
+ VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
+}
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index 00c6656..b3a7057 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,7 +5,7 @@
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_svv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; SDAG-LABEL: add_max_u32_ssv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_ssv:
@@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN-LABEL: add_max_u32_vsi:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
+; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN-LABEL: add_max_u32_svl:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
; SDAG-LABEL: add_max_u32_slv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_slv:
@@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 7ee0015f..711d57b 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) {
; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2
+; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
@@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) {
; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4
; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5
-; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4
+; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5
+; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3]
@@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
@@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8
; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
@@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14
-; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9
+; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8
+; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8
; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11
-; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10
+; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11
+; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3]
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll
index f734db8..20fe272 100644
--- a/llvm/test/CodeGen/ARM/llround-conv.ll
+++ b/llvm/test/CodeGen/ARM/llround-conv.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
define i64 @testmsxh_builtin(half %x) {
@@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) {
; CHECK-NOFP16-NEXT: bl llroundf
; CHECK-NOFP16-NEXT: pop {r11, pc}
;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: .save {r11, lr}
+; CHECK-FPv8-NEXT: push {r11, lr}
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: bl llroundf
+; CHECK-FPv8-NEXT: pop {r11, pc}
+;
; CHECK-FP16-LABEL: testmsxh_builtin:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll
index 03f7a0d..7466bcb 100644
--- a/llvm/test/CodeGen/ARM/lround-conv.ll
+++ b/llvm/test/CodeGen/ARM/lround-conv.ll
@@ -4,11 +4,39 @@
; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-;define i32 @testmswh_builtin(half %x) {
-;entry:
-; %0 = tail call i32 @llvm.lround.i32.f16(half %x)
-; ret i32 %0
-;}
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lroundf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lroundf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: b lroundf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vcvta.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lround.i32.f16(half %x)
+ ret i32 %0
+}
define i32 @testmsws_builtin(float %x) {
; CHECK-LABEL: testmsws_builtin:
@@ -40,8 +68,3 @@ entry:
ret i32 %0
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-FPv8: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
new file mode 100644
index 0000000..9190d03
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -dxil-prepare < %s | FileCheck %s
+
+; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata
+
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+define void @main() {
+entry:
+ ret void
+}
+
+; CHECK-NOT: !llvm.errno.tbaa
+; CHECK-NOT: {{^!}}
+
+!llvm.errno.tbaa = !{!0}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll
new file mode 100644
index 0000000..f7e5cdb
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll
@@ -0,0 +1,33 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1)
+; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0)
+; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4)
+; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]])
+; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0)
+; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]])
+; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]])
+; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]])
+; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}})
+; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}})
+; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}})
+; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}})
+; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}})
+; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}})
+; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}})
+define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) {
+entry:
+ %load = load i64, ptr %in, align 4
+ %bitcast = bitcast i64 %load to <64 x i1>
+ %e0 = extractelement <64 x i1> %bitcast, i32 0
+ %e1 = extractelement <64 x i1> %bitcast, i32 1
+ %z0 = zext i1 %e0 to i8
+ %z1 = zext i1 %e1 to i8
+ %ptr0 = getelementptr i8, ptr %out, i32 0
+ %ptr1 = getelementptr i8, ptr %out, i32 1
+ store i8 %z0, ptr %ptr0, align 1
+ store i8 %z1, ptr %ptr1, align 1
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
new file mode 100644
index 0000000..5de715b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
@@ -0,0 +1,22 @@
+{
+ "entities": {
+ "KILL": [0.1, 0.2, 0.3],
+ "MOV": [0.4, 0.5, 0.6],
+ "LEA": [0.7, 0.8, 0.9],
+ "RET": [1.0, 1.1, 1.2],
+ "ADD": [1.3, 1.4, 1.5],
+ "SUB": [1.6, 1.7, 1.8],
+ "IMUL": [1.9, 2.0, 2.1],
+ "AND": [2.2, 2.3, 2.4],
+ "OR": [2.5, 2.6, 2.7],
+ "XOR": [2.8, 2.9, 3.0],
+ "CMP": [3.1, 3.2, 3.3],
+ "TEST": [3.4, 3.5, 3.6],
+ "JMP": [3.7, 3.8, 3.9],
+ "CALL": [4.0, 4.1, 4.2],
+ "PUSH": [4.3, 4.4, 4.5],
+ "POP": [4.6, 4.7, 4.8],
+ "NOP": [4.9, 5.0, 5.1],
+ "COPY": [5.2, 5.3, 5.4]
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/if-else.mir b/llvm/test/CodeGen/MIR2Vec/if-else.mir
new file mode 100644
index 0000000..5734a23
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/if-else.mir
@@ -0,0 +1,144 @@
+# REQUIRES: x86-registered-target
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+ define dso_local i32 @abc(i32 noundef %a, i32 noundef %b) {
+ entry:
+ %retval = alloca i32, align 4
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4
+ store i32 %b, ptr %b.addr, align 4
+ %0 = load i32, ptr %a.addr, align 4
+ %1 = load i32, ptr %b.addr, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.else
+
+ if.then: ; preds = %entry
+ %2 = load i32, ptr %b.addr, align 4
+ store i32 %2, ptr %retval, align 4
+ br label %return
+
+ if.else: ; preds = %entry
+ %3 = load i32, ptr %a.addr, align 4
+ store i32 %3, ptr %retval, align 4
+ br label %return
+
+ return: ; preds = %if.else, %if.then
+ %4 = load i32, ptr %retval, align 4
+ ret i32 %4
+ }
+...
+---
+name: abc
+alignment: 16
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: gr32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: gr32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: gr32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: gr32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: gr32, preferred-register: '', flags: [ ] }
+ - { id: 5, class: gr32, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$edi', virtual-reg: '%0' }
+ - { reg: '$esi', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack:
+ - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: a.addr, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: b.addr, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ amxProgModel: None
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $edi, $esi
+
+ %1:gr32 = COPY $esi
+ %0:gr32 = COPY $edi
+ MOV32mr %stack.1.a.addr, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.a.addr)
+ MOV32mr %stack.2.b.addr, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %ir.b.addr)
+ %2:gr32 = SUB32rr %0, %1, implicit-def $eflags
+ JCC_1 %bb.2, 14, implicit $eflags
+ JMP_1 %bb.1
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+
+ %4:gr32 = MOV32rm %stack.2.b.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.b.addr)
+ MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into %ir.retval)
+ JMP_1 %bb.3
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+
+ %3:gr32 = MOV32rm %stack.1.a.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.a.addr)
+ MOV32mr %stack.0.retval, 1, $noreg, 0, $noreg, killed %3 :: (store (s32) into %ir.retval)
+
+ bb.3.return:
+ %5:gr32 = MOV32rm %stack.0.retval, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.retval)
+ $eax = COPY %5
+ RET 0, $eax
+...
+
+# CHECK: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: abc:entry:
+# CHECK-NEXT: [ 16.50 17.10 17.70 ]
+# CHECK-NEXT: Machine basic block: abc:if.then:
+# CHECK-NEXT: [ 4.50 4.80 5.10 ]
+# CHECK-NEXT: Machine basic block: abc:if.else:
+# CHECK-NEXT: [ 0.80 1.00 1.20 ]
+# CHECK-NEXT: Machine basic block: abc:return:
+# CHECK-NEXT: [ 6.60 6.90 7.20 ] \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
new file mode 100644
index 0000000..338cb63
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
@@ -0,0 +1,76 @@
+# REQUIRES: x86-registered-target
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=none -print-mir2vec -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_3D_vocab.json %s -o /dev/null 2>&1 | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+ define dso_local noundef i32 @add_function(i32 noundef %a, i32 noundef %b) {
+ entry:
+ %sum = add nsw i32 %a, %b
+ %result = mul nsw i32 %sum, 2
+ ret i32 %result
+ }
+
+ define dso_local void @simple_function() {
+ entry:
+ ret void
+ }
+...
+---
+name: add_function
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32 }
+ - { id: 2, class: gr32 }
+ - { id: 3, class: gr32 }
+liveins:
+ - { reg: '$edi', virtual-reg: '%0' }
+ - { reg: '$esi', virtual-reg: '%1' }
+body: |
+ bb.0.entry:
+ liveins: $edi, $esi
+
+ %1:gr32 = COPY $esi
+ %0:gr32 = COPY $edi
+ %2:gr32 = nsw ADD32rr %0, %1, implicit-def dead $eflags
+ %3:gr32 = ADD32rr %2, %2, implicit-def dead $eflags
+ $eax = COPY %3
+ RET 0, $eax
+
+---
+name: simple_function
+alignment: 16
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ RET 0
+
+# CHECK: MIR2Vec embeddings for machine function add_function:
+# CHECK: Function vector: [ 19.20 19.80 20.40 ]
+# CHECK-NEXT: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: add_function:entry:
+# CHECK-NEXT: [ 19.20 19.80 20.40 ]
+# CHECK-NEXT: Machine instruction vectors:
+# CHECK-NEXT: Machine instruction: %1:gr32 = COPY $esi
+# CHECK-NEXT: [ 5.20 5.30 5.40 ]
+# CHECK-NEXT: Machine instruction: %0:gr32 = COPY $edi
+# CHECK-NEXT: [ 5.20 5.30 5.40 ]
+# CHECK-NEXT: Machine instruction: %2:gr32 = nsw ADD32rr %0:gr32(tied-def 0), %1:gr32, implicit-def dead $eflags
+# CHECK-NEXT: [ 1.30 1.40 1.50 ]
+# CHECK-NEXT: Machine instruction: %3:gr32 = ADD32rr %2:gr32(tied-def 0), %2:gr32, implicit-def dead $eflags
+# CHECK-NEXT: [ 1.30 1.40 1.50 ]
+# CHECK-NEXT: Machine instruction: $eax = COPY %3:gr32
+# CHECK-NEXT: [ 5.20 5.30 5.40 ]
+# CHECK-NEXT: Machine instruction: RET 0, $eax
+# CHECK-NEXT: [ 1.00 1.10 1.20 ]
+
+# CHECK: MIR2Vec embeddings for machine function simple_function:
+# CHECK-NEXT:Function vector: [ 1.00 1.10 1.20 ]
+# CHECK-NEXT: Machine basic block vectors:
+# CHECK-NEXT: Machine basic block: simple_function:entry:
+# CHECK-NEXT: [ 1.00 1.10 1.20 ]
+# CHECK-NEXT: Machine instruction vectors:
+# CHECK-NEXT: Machine instruction: RET 0
+# CHECK-NEXT: [ 1.00 1.10 1.20 ] \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
index 80b4048..c6554bc 100644
--- a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
@@ -1,8 +1,8 @@
-; REQUIRES: x86_64-linux
-; RUN: llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
-; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
+; REQUIRES: x86-registered-target
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
define dso_local void @test() {
entry:
diff --git a/llvm/test/CodeGen/RISCV/atomic-fence.ll b/llvm/test/CodeGen/RISCV/atomic-fence.ll
index 7103345..77148f6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-fence.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-fence.ll
@@ -1,12 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck --check-prefixes=CHECK,TSO %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck --check-prefixes=CHECK,WMO %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
index 7e3abc7..c6234de 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
@@ -1,12 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I-ZALRSC %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-ZALRSC %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
@@ -44,6 +48,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i8_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lb a0, 0(a0)
@@ -59,6 +68,11 @@ define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i8_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lb a0, 0(a0)
@@ -78,6 +92,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i8_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lb a0, 0(a0)
@@ -93,6 +112,11 @@ define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i8_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lb a0, 0(a0)
@@ -112,6 +136,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: lb a0, 0(a0)
@@ -133,6 +163,12 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: lb a0, 0(a0)
@@ -200,6 +236,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
@@ -223,6 +266,13 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
@@ -286,6 +336,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i16_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lh a0, 0(a0)
@@ -301,6 +356,11 @@ define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i16_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lh a0, 0(a0)
@@ -320,6 +380,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i16_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lh a0, 0(a0)
@@ -335,6 +400,11 @@ define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i16_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lh a0, 0(a0)
@@ -354,6 +424,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: lh a0, 0(a0)
@@ -375,6 +451,12 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: lh a0, 0(a0)
@@ -442,6 +524,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
@@ -465,6 +554,13 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
@@ -528,6 +624,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i32_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lw a0, 0(a0)
@@ -543,6 +644,11 @@ define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i32_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lw a0, 0(a0)
@@ -562,6 +668,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lw a0, 0(a0)
@@ -577,6 +688,11 @@ define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lw a0, 0(a0)
@@ -596,6 +712,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: lw a0, 0(a0)
@@ -617,6 +739,12 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: lw a0, 0(a0)
@@ -684,6 +812,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT: fence r, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
@@ -707,6 +842,13 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
@@ -770,6 +912,16 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a1, 0
+; RV32I-ZALRSC-NEXT: call __atomic_load_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i64_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -790,6 +942,11 @@ define i64 @atomic_load_i64_unordered(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i64_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: ld a0, 0(a0)
@@ -809,6 +966,16 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a1, 0
+; RV32I-ZALRSC-NEXT: call __atomic_load_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -829,6 +996,11 @@ define i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_load_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: ld a0, 0(a0)
@@ -848,6 +1020,16 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a1, 2
+; RV32I-ZALRSC-NEXT: call __atomic_load_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -868,6 +1050,12 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: ld a0, 0(a0)
@@ -914,6 +1102,16 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a1, 5
+; RV32I-ZALRSC-NEXT: call __atomic_load_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_load_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -934,6 +1132,13 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_load_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: ld a0, 0(a0)
+; RV64I-ZALRSC-NEXT: fence r, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
@@ -979,6 +1184,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i8_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sb a1, 0(a0)
@@ -994,6 +1204,11 @@ define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i8_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sb a1, 0(a0)
@@ -1013,6 +1228,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i8_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sb a1, 0(a0)
@@ -1028,6 +1248,11 @@ define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i8_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sb a1, 0(a0)
@@ -1047,6 +1272,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i8_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1068,6 +1299,12 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i8_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1135,6 +1372,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i8_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1157,6 +1401,13 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sb a1, 0(a0)
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i8_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1219,6 +1470,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i16_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sh a1, 0(a0)
@@ -1234,6 +1490,11 @@ define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i16_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sh a1, 0(a0)
@@ -1253,6 +1514,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i16_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sh a1, 0(a0)
@@ -1268,6 +1534,11 @@ define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i16_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sh a1, 0(a0)
@@ -1287,6 +1558,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i16_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1308,6 +1585,12 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i16_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1375,6 +1658,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i16_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1397,6 +1687,13 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sh a1, 0(a0)
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i16_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1459,6 +1756,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i32_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sw a1, 0(a0)
@@ -1474,6 +1776,11 @@ define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i32_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sw a1, 0(a0)
@@ -1493,6 +1800,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: sw a1, 0(a0)
@@ -1508,6 +1820,11 @@ define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sw a1, 0(a0)
@@ -1527,6 +1844,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1548,6 +1871,12 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1615,6 +1944,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: fence rw, w
+; RV32I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV32I-ZALRSC-NEXT: fence rw, rw
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, w
@@ -1637,6 +1973,13 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1699,6 +2042,16 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_store_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i64_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -1719,6 +2072,11 @@ define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i64_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sd a1, 0(a0)
@@ -1738,6 +2096,16 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_store_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -1758,6 +2126,11 @@ define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomic_store_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: sd a1, 0(a0)
@@ -1777,6 +2150,16 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_store_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -1797,6 +2180,12 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
@@ -1843,6 +2232,16 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_store_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_store_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomic_store_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -1863,6 +2262,13 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomic_store_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: fence rw, w
+; RV64I-ZALRSC-NEXT: sd a1, 0(a0)
+; RV64I-ZALRSC-NEXT: fence rw, rw
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, w
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
index 4dafd6a..d5238ab 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
@@ -3,10 +3,14 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s
define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
; RV32I-LABEL: atomicrmw_sub_i32_constant:
@@ -26,6 +30,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_constant:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a1, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB0_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a1
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i32_constant:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -42,6 +58,18 @@ define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
; RV64IA-NEXT: li a1, -1
; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_constant:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB0_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i32 1 seq_cst
ret i32 %1
}
@@ -71,6 +99,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_constant:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a1, 1
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: li a2, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i64_constant:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -87,6 +127,18 @@ define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind {
; RV64IA-NEXT: li a1, -1
; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_constant:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB1_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i64 1 seq_cst
ret i64 %1
}
@@ -109,6 +161,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind {
; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_neg:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: sub a2, a1, a2
+; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a1, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB2_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a1
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i32_neg:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -125,6 +189,18 @@ define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind {
; RV64IA-NEXT: sub a2, a2, a1
; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_neg:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: subw a2, a1, a2
+; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB2_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%b = sub i32 %x, %y
%1 = atomicrmw sub ptr %a, i32 %b seq_cst
ret i32 %1
@@ -159,6 +235,20 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_neg:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sltu a5, a1, a3
+; RV32I-ZALRSC-NEXT: sub a2, a2, a4
+; RV32I-ZALRSC-NEXT: sub a2, a2, a5
+; RV32I-ZALRSC-NEXT: sub a1, a1, a3
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i64_neg:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -175,6 +265,18 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind {
; RV64IA-NEXT: sub a2, a2, a1
; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_neg:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sub a2, a1, a2
+; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB3_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%b = sub i64 %x, %y
%1 = atomicrmw sub ptr %a, i64 %b seq_cst
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 1213256..26feb83 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -1,12 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-WMO,RV32IA-WMO-NOZACAS %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS,RV32IA-TSO,RV32IA-TSO-NOZACAS %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-WMO,RV64IA-WMO-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
@@ -50,6 +54,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB0_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -80,6 +104,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB0_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -174,6 +218,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB1_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -224,6 +288,26 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB1_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -378,6 +462,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB2_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -428,6 +532,26 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB2_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -582,6 +706,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -632,6 +776,26 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -786,6 +950,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -816,6 +1000,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -914,6 +1118,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB5_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -936,6 +1156,22 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB5_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -1004,6 +1240,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1037,6 +1289,22 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1138,6 +1406,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB7_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1171,6 +1455,22 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB7_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1272,6 +1572,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1305,6 +1621,22 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1406,6 +1738,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1439,6 +1787,22 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1540,6 +1904,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB10_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -1561,6 +1940,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB10_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -1630,6 +2024,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB11_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1661,6 +2070,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB11_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1760,6 +2184,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB12_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1791,6 +2230,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB12_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1890,6 +2344,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB13_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -1921,6 +2390,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB13_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -2020,6 +2504,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a2, 255
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB14_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -2051,6 +2550,21 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a2, 255
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB14_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -2149,6 +2663,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -2179,6 +2713,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -2273,6 +2827,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2323,6 +2897,26 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2477,6 +3071,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB17_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2527,6 +3141,26 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB17_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2681,6 +3315,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2731,6 +3385,26 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -2885,6 +3559,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB19_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -2915,6 +3609,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB19_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3009,6 +3723,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB20_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3039,6 +3773,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB20_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3137,6 +3891,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB21_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3187,6 +3961,26 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB21_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3345,6 +4139,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB22_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3395,6 +4209,26 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB22_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3553,6 +4387,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3603,6 +4457,26 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -3761,6 +4635,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3791,6 +4685,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3889,6 +4803,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB25_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3913,6 +4846,25 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB25_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3989,6 +4941,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB26_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4027,6 +4998,25 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB26_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4145,6 +5135,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB27_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4183,6 +5192,25 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB27_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4301,6 +5329,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB28_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4339,6 +5386,25 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB28_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4457,6 +5523,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB29_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4495,6 +5580,25 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB29_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4613,6 +5717,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB30_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4644,6 +5769,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB30_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4865,6 +6011,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB31_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -4917,6 +6084,27 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB31_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -5201,6 +6389,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB32_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -5253,6 +6462,27 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB32_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -5537,6 +6767,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB33_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -5589,6 +6840,27 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB33_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -5873,6 +7145,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB34_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -5904,6 +7197,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB34_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -6129,6 +7443,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB35_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -6149,6 +7478,21 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB35_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -6213,6 +7557,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB36_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6243,6 +7602,21 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB36_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6337,6 +7711,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB37_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6367,6 +7756,21 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB37_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6461,6 +7865,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB38_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6491,6 +7910,21 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB38_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6585,6 +8019,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB39_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6615,6 +8064,21 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB39_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6709,6 +8173,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB40_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -6729,6 +8208,21 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB40_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -6793,6 +8287,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB41_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6823,6 +8332,21 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB41_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6917,6 +8441,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB42_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -6947,6 +8486,21 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB42_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7041,6 +8595,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB43_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7071,6 +8640,21 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB43_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7165,6 +8749,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB44_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7195,6 +8794,21 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB44_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7321,6 +8935,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB45_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB45_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -7392,6 +9035,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB45_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB45_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -7545,6 +9217,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB46_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB46_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7645,6 +9346,35 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB46_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB46_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7885,6 +9615,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB47_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -7985,6 +9744,35 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB47_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -8225,6 +10013,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB48_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB48_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -8325,6 +10142,35 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB48_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB48_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -8565,6 +10411,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB49_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -8636,6 +10511,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB49_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -8789,6 +10693,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB50_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB50_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -8860,6 +10793,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB50_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB50_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -9013,6 +10975,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB51_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB51_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -9113,6 +11104,35 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB51_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB51_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -9353,6 +11373,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB52_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB52_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -9453,6 +11502,35 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB52_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB52_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -9693,6 +11771,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB53_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB53_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -9793,6 +11900,35 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB53_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB53_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -10033,6 +12169,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB54_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB54_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -10104,6 +12269,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB54_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB54_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -10255,6 +12449,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB55_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -10319,6 +12537,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB55_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB55_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -10455,6 +12697,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB56_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -10543,6 +12809,30 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB56_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB56_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -10751,6 +13041,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB57_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -10839,6 +13153,30 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB57_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB57_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -11047,6 +13385,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB58_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -11135,6 +13497,30 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB58_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB58_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -11343,6 +13729,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB59_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -11407,6 +13817,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB59_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB59_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -11543,6 +13977,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB60_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -11607,6 +14065,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB60_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB60_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -11743,6 +14225,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB61_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -11831,6 +14337,30 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB61_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB61_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12039,6 +14569,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB62_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12127,6 +14681,30 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB62_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB62_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12335,6 +14913,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB63_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12423,6 +15025,30 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB63_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB63_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12631,6 +15257,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB64_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -12695,6 +15345,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB64_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB64_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -12801,6 +15475,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB65_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -12832,6 +15527,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB65_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -12929,6 +15645,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB66_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -12981,6 +15718,27 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB66_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -13141,6 +15899,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB67_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -13193,6 +15972,27 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB67_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -13353,6 +16153,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB68_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -13405,6 +16226,27 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB68_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -13565,6 +16407,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB69_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13596,6 +16459,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB69_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13697,6 +16581,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB70_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -13720,6 +16621,23 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB70_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -13791,6 +16709,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB71_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -13826,6 +16761,23 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB71_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -13933,6 +16885,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB72_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -13968,6 +16937,23 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB72_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14075,6 +17061,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB73_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14110,6 +17113,23 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB73_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14217,6 +17237,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: not a2, a2
+; RV32I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT: and a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB74_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14252,6 +17289,23 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: not a2, a2
+; RV64I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT: and a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB74_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14360,6 +17414,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB75_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -14383,6 +17453,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB75_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a1, a0, -4
@@ -14456,6 +17542,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB76_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14490,6 +17592,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB76_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14596,6 +17714,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB77_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14630,6 +17764,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB77_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14736,6 +17886,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB78_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14770,6 +17936,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB78_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14876,6 +18058,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a1, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a2, 16
+; RV32I-ZALRSC-NEXT: addi a2, a2, -1
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV32I-ZALRSC-NEXT: or a4, a3, a2
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB79_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -14910,6 +18108,22 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a2, 16
+; RV64I-ZALRSC-NEXT: addi a2, a2, -1
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1)
+; RV64I-ZALRSC-NEXT: or a4, a3, a2
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB79_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
@@ -15014,6 +18228,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB80_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -15045,6 +18280,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB80_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -15142,6 +18398,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB81_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15194,6 +18471,27 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB81_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15354,6 +18652,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB82_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15406,6 +18725,27 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB82_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15566,6 +18906,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB83_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15618,6 +18979,27 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB83_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -15778,6 +19160,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB84_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -15809,6 +19212,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB84_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -15906,6 +19330,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB85_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -15937,6 +19382,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB85_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -16038,6 +19504,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB86_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16090,6 +19577,27 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB86_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16254,6 +19762,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB87_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16306,6 +19835,27 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB87_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16470,6 +20020,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB88_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16522,6 +20093,27 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB88_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16686,6 +20278,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB89_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -16717,6 +20330,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB89_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -16818,6 +20452,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB90_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -16843,6 +20497,26 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB90_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -16922,6 +20596,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB91_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -16962,6 +20656,26 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB91_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17086,6 +20800,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB92_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17126,6 +20860,26 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB92_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17250,6 +21004,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB93_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17290,6 +21064,26 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB93_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17414,6 +21208,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB94_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17454,6 +21268,26 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB94_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17578,6 +21412,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB95_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -17610,6 +21466,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB95_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -17838,6 +21716,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB96_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -17892,6 +21792,28 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB96_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -18186,6 +22108,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB97_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -18240,6 +22184,28 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB97_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -18534,6 +22500,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB98_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -18588,6 +22576,28 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB98_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -18882,6 +22892,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB99_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -18914,6 +22946,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB99_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -19146,6 +23200,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB100_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -19167,6 +23237,22 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB100_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -19234,6 +23320,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB101_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19266,6 +23368,22 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB101_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19366,6 +23484,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB102_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19398,6 +23532,22 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB102_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19498,6 +23648,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB103_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19530,6 +23696,22 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB103_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19630,6 +23812,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB104_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19662,6 +23860,22 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB104_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19762,6 +23976,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB105_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -19783,6 +24013,22 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB105_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -19850,6 +24096,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB106_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19882,6 +24144,22 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB106_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -19982,6 +24260,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB107_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20014,6 +24308,22 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB107_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20114,6 +24424,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB108_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20146,6 +24472,22 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB108_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20246,6 +24588,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB109_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20278,6 +24636,22 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB109_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20410,6 +24784,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB110_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB110_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -20483,6 +24888,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB110_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB110_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -20642,6 +25078,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB111_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB111_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20746,6 +25213,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB111_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB111_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -20998,6 +25496,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB112_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB112_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -21102,6 +25631,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB112_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB112_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -21354,6 +25914,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB113_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB113_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -21458,6 +26049,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB113_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB113_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -21710,6 +26332,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB114_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB114_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -21783,6 +26436,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB114_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB114_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -21942,6 +26626,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB115_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB115_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -22015,6 +26730,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB115_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB115_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -22174,6 +26920,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB116_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB116_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -22278,6 +27055,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB116_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB116_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -22530,6 +27338,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB117_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB117_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -22634,6 +27473,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB117_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB117_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -22886,6 +27756,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB118_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB118_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -22990,6 +27891,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB118_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB118_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -23242,6 +28174,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB119_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB119_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -23315,6 +28278,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB119_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB119_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -23476,6 +28470,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB120_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -23545,6 +28564,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB120_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -23688,6 +28732,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB121_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -23782,6 +28851,31 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB121_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -24000,6 +29094,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB122_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -24094,6 +29213,31 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB122_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -24312,6 +29456,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB123_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -24406,6 +29575,31 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB123_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -24624,6 +29818,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB124_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -24693,6 +29912,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB124_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -24836,6 +30080,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB125_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -24905,6 +30174,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB125_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -25048,6 +30342,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB126_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25142,6 +30461,31 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB126_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25360,6 +30704,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB127_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25454,6 +30823,31 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB127_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25672,6 +31066,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB128_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25766,6 +31185,31 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB128_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
@@ -25984,6 +31428,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB129_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -26053,6 +31522,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB129_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: andi a2, a0, -4
@@ -26162,6 +31656,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB130_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amoswap.w a0, a1, (a0)
@@ -26177,6 +31682,17 @@ define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB130_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_xchg_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoswap.w a0, a1, (a0)
@@ -26196,6 +31712,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB131_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0)
@@ -26216,6 +31743,17 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB131_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.w.aq a0, a1, (a0)
@@ -26240,6 +31778,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB132_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0)
@@ -26260,6 +31809,17 @@ define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB132_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.w.rl a0, a1, (a0)
@@ -26284,6 +31844,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB133_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0)
@@ -26304,6 +31875,17 @@ define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB133_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0)
@@ -26328,6 +31910,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB134_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0)
@@ -26348,6 +31941,17 @@ define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB134_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.w.aqrl a0, a1, (a0)
@@ -26372,6 +31976,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB135_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amoadd.w a0, a1, (a0)
@@ -26387,6 +32002,17 @@ define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB135_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB135_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_add_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoadd.w a0, a1, (a0)
@@ -26406,6 +32032,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB136_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_add_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0)
@@ -26426,6 +32063,17 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB136_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.w.aq a0, a1, (a0)
@@ -26450,6 +32098,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB137_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_add_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0)
@@ -26470,6 +32129,17 @@ define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB137_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.w.rl a0, a1, (a0)
@@ -26494,6 +32164,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB138_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_add_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0)
@@ -26514,6 +32195,17 @@ define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB138_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0)
@@ -26538,6 +32230,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB139_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_add_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0)
@@ -26558,6 +32261,17 @@ define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB139_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.w.aqrl a0, a1, (a0)
@@ -26582,6 +32296,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB140_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: neg a1, a1
@@ -26598,6 +32323,17 @@ define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB140_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_sub_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: neg a1, a1
@@ -26618,6 +32354,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB141_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: neg a1, a1
@@ -26640,6 +32387,17 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB141_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -26666,6 +32424,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB142_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_sub_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: neg a1, a1
@@ -26688,6 +32457,17 @@ define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB142_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -26714,6 +32494,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB143_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: neg a1, a1
@@ -26736,6 +32527,17 @@ define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB143_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB143_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -26762,6 +32564,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB144_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: neg a1, a1
@@ -26784,6 +32597,17 @@ define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB144_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -26810,6 +32634,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB145_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amoand.w a0, a1, (a0)
@@ -26825,6 +32660,17 @@ define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB145_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_and_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoand.w a0, a1, (a0)
@@ -26844,6 +32690,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB146_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_and_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoand.w.aq a0, a1, (a0)
@@ -26864,6 +32721,17 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB146_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.w.aq a0, a1, (a0)
@@ -26888,6 +32756,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB147_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_and_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoand.w.rl a0, a1, (a0)
@@ -26908,6 +32787,17 @@ define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB147_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.w.rl a0, a1, (a0)
@@ -26932,6 +32822,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB148_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_and_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0)
@@ -26952,6 +32853,17 @@ define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB148_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0)
@@ -26976,6 +32888,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB149_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_and_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0)
@@ -26996,6 +32919,17 @@ define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB149_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.w.aqrl a0, a1, (a0)
@@ -27020,6 +32954,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB150_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -27042,6 +32988,18 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB150_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -27200,6 +33158,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB151_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -27234,6 +33204,18 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB151_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -27432,6 +33414,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB152_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -27466,6 +33460,18 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB152_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -27664,6 +33670,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB153_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV32IA-WMO-NOZACAS: # %bb.0:
; RV32IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -27698,6 +33716,18 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB153_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -27896,6 +33926,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB154_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV32IA-NOZACAS: # %bb.0:
; RV32IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -27918,6 +33960,18 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB154_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -28112,6 +34166,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB155_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amoor.w a0, a1, (a0)
@@ -28127,6 +34192,17 @@ define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB155_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB155_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_or_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoor.w a0, a1, (a0)
@@ -28146,6 +34222,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB156_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_or_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoor.w.aq a0, a1, (a0)
@@ -28166,6 +34253,17 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB156_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.w.aq a0, a1, (a0)
@@ -28190,6 +34288,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB157_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_or_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoor.w.rl a0, a1, (a0)
@@ -28210,6 +34319,17 @@ define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB157_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.w.rl a0, a1, (a0)
@@ -28234,6 +34354,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB158_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_or_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0)
@@ -28254,6 +34385,17 @@ define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB158_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB158_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0)
@@ -28278,6 +34420,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB159_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_or_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0)
@@ -28298,6 +34451,17 @@ define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB159_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB159_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.w.aqrl a0, a1, (a0)
@@ -28322,6 +34486,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB160_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amoxor.w a0, a1, (a0)
@@ -28337,6 +34512,17 @@ define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB160_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB160_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_xor_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoxor.w a0, a1, (a0)
@@ -28356,6 +34542,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB161_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0)
@@ -28376,6 +34573,17 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB161_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB161_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.w.aq a0, a1, (a0)
@@ -28400,6 +34608,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB162_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xor_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0)
@@ -28420,6 +34639,17 @@ define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB162_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB162_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.w.rl a0, a1, (a0)
@@ -28444,6 +34674,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB163_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0)
@@ -28464,6 +34705,17 @@ define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB163_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB163_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0)
@@ -28488,6 +34740,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB164_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0)
@@ -28508,6 +34771,17 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB164_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB164_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.w.aqrl a0, a1, (a0)
@@ -28558,6 +34832,21 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB165_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB165_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amomax.w a0, a1, (a0)
@@ -28602,6 +34891,22 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB165_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB165_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB165_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB165_3: # in Loop: Header=BB165_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB165_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_max_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomax.w a0, a1, (a0)
@@ -28647,6 +34952,21 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB166_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB166_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomax.w.aq a0, a1, (a0)
@@ -28696,6 +35016,22 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB166_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB166_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB166_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB166_3: # in Loop: Header=BB166_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB166_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.w.aq a0, a1, (a0)
@@ -28746,6 +35082,21 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB167_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB167_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomax.w.rl a0, a1, (a0)
@@ -28795,6 +35146,22 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB167_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB167_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB167_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB167_3: # in Loop: Header=BB167_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB167_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.w.rl a0, a1, (a0)
@@ -28845,6 +35212,21 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB168_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB168_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0)
@@ -28894,6 +35276,22 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB168_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB168_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB168_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB168_3: # in Loop: Header=BB168_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB168_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0)
@@ -28944,6 +35342,21 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB169_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB169_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0)
@@ -28993,6 +35406,22 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB169_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB169_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB169_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB169_3: # in Loop: Header=BB169_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB169_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.w.aqrl a0, a1, (a0)
@@ -29043,6 +35472,21 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB170_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB170_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amomin.w a0, a1, (a0)
@@ -29087,6 +35531,22 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB170_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB170_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB170_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB170_3: # in Loop: Header=BB170_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB170_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_min_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomin.w a0, a1, (a0)
@@ -29132,6 +35592,21 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB171_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB171_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomin.w.aq a0, a1, (a0)
@@ -29181,6 +35656,22 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB171_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB171_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB171_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB171_3: # in Loop: Header=BB171_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB171_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.w.aq a0, a1, (a0)
@@ -29231,6 +35722,21 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB172_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB172_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomin.w.rl a0, a1, (a0)
@@ -29280,6 +35786,22 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB172_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB172_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB172_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB172_3: # in Loop: Header=BB172_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB172_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.w.rl a0, a1, (a0)
@@ -29330,6 +35852,21 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB173_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB173_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0)
@@ -29379,6 +35916,22 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB173_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB173_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB173_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB173_3: # in Loop: Header=BB173_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB173_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0)
@@ -29429,6 +35982,21 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB174_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB174_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0)
@@ -29478,6 +36046,22 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB174_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB174_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB174_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB174_3: # in Loop: Header=BB174_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB174_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.w.aqrl a0, a1, (a0)
@@ -29528,6 +36112,21 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB175_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB175_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amomaxu.w a0, a1, (a0)
@@ -29572,6 +36171,22 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB175_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB175_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB175_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB175_3: # in Loop: Header=BB175_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB175_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_umax_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomaxu.w a0, a1, (a0)
@@ -29617,6 +36232,21 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB176_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB176_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0)
@@ -29666,6 +36296,22 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB176_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB176_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB176_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB176_3: # in Loop: Header=BB176_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB176_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.w.aq a0, a1, (a0)
@@ -29716,6 +36362,21 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB177_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB177_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0)
@@ -29765,6 +36426,22 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB177_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB177_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB177_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB177_3: # in Loop: Header=BB177_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB177_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.w.rl a0, a1, (a0)
@@ -29815,6 +36492,21 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB178_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB178_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0)
@@ -29864,6 +36556,22 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB178_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB178_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB178_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB178_3: # in Loop: Header=BB178_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB178_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0)
@@ -29914,6 +36622,21 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB179_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB179_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0)
@@ -29963,6 +36686,22 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB179_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB179_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB179_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB179_3: # in Loop: Header=BB179_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB179_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.w.aqrl a0, a1, (a0)
@@ -30013,6 +36752,21 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB180_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB180_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: amominu.w a0, a1, (a0)
@@ -30057,6 +36811,22 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB180_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB180_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB180_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB180_3: # in Loop: Header=BB180_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB180_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_umin_i32_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amominu.w a0, a1, (a0)
@@ -30102,6 +36872,21 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB181_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB181_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acquire:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amominu.w.aq a0, a1, (a0)
@@ -30151,6 +36936,22 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB181_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB181_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB181_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB181_3: # in Loop: Header=BB181_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB181_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.w.aq a0, a1, (a0)
@@ -30201,6 +37002,21 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB182_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB182_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_release:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amominu.w.rl a0, a1, (a0)
@@ -30250,6 +37066,22 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB182_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB182_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB182_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB182_3: # in Loop: Header=BB182_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB182_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i32_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.w.rl a0, a1, (a0)
@@ -30300,6 +37132,21 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB183_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB183_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0)
@@ -30349,6 +37196,22 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB183_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aq a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB183_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB183_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB183_3: # in Loop: Header=BB183_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB183_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0)
@@ -30399,6 +37262,21 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB184_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB184_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0)
@@ -30448,6 +37326,22 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB184_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB184_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB184_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB184_3: # in Loop: Header=BB184_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB184_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.w.aqrl a0, a1, (a0)
@@ -30472,6 +37366,16 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30492,6 +37396,17 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB185_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB185_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_xchg_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoswap.d a0, a1, (a0)
@@ -30511,6 +37426,16 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30531,6 +37456,17 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB186_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB186_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.d.aq a0, a1, (a0)
@@ -30555,6 +37491,16 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30575,6 +37521,17 @@ define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB187_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB187_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.d.rl a0, a1, (a0)
@@ -30599,6 +37556,16 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30619,6 +37586,17 @@ define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB188_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB188_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0)
@@ -30643,6 +37621,16 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xchg_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30663,6 +37651,17 @@ define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB189_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB189_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xchg_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoswap.d.aqrl a0, a1, (a0)
@@ -30687,6 +37686,16 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30707,6 +37716,17 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB190_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB190_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_add_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoadd.d a0, a1, (a0)
@@ -30726,6 +37746,16 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30746,6 +37776,17 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB191_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB191_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.d.aq a0, a1, (a0)
@@ -30770,6 +37811,16 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30790,6 +37841,17 @@ define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB192_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB192_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.d.rl a0, a1, (a0)
@@ -30814,6 +37876,16 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30834,6 +37906,17 @@ define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB193_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB193_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0)
@@ -30858,6 +37941,16 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_add_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30878,6 +37971,17 @@ define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB194_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB194_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_add_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoadd.d.aqrl a0, a1, (a0)
@@ -30902,6 +38006,16 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30922,6 +38036,17 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB195_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB195_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_sub_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: neg a1, a1
@@ -30942,6 +38067,16 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -30962,6 +38097,17 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB196_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB196_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -30988,6 +38134,16 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31008,6 +38164,17 @@ define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB197_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB197_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -31034,6 +38201,16 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31054,6 +38231,17 @@ define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB198_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB198_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -31080,6 +38268,16 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_sub_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31100,6 +38298,17 @@ define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB199_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB199_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_sub_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: neg a1, a1
@@ -31126,6 +38335,16 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31146,6 +38365,17 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB200_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB200_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_and_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoand.d a0, a1, (a0)
@@ -31165,6 +38395,16 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31185,6 +38425,17 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB201_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB201_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.d.aq a0, a1, (a0)
@@ -31209,6 +38460,16 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31229,6 +38490,17 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB202_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB202_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.d.rl a0, a1, (a0)
@@ -31253,6 +38525,16 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31273,6 +38555,17 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB203_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB203_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0)
@@ -31297,6 +38590,16 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_and_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31317,6 +38620,17 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB204_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB204_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_and_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoand.d.aqrl a0, a1, (a0)
@@ -31341,6 +38655,16 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_nand_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31361,6 +38685,18 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB205_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_monotonic:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: .LBB205_1: # =>This Inner Loop Header: Depth=1
@@ -31453,6 +38789,16 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_nand_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31473,6 +38819,18 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB206_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acquire:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB206_1: # =>This Inner Loop Header: Depth=1
@@ -31591,6 +38949,16 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_nand_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31611,6 +38979,18 @@ define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB207_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_release:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB207_1: # =>This Inner Loop Header: Depth=1
@@ -31729,6 +39109,16 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_nand_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31749,6 +39139,18 @@ define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB208_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i64_acq_rel:
; RV64IA-WMO-NOZACAS: # %bb.0:
; RV64IA-WMO-NOZACAS-NEXT: .LBB208_1: # =>This Inner Loop Header: Depth=1
@@ -31867,6 +39269,16 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_nand_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -31887,6 +39299,18 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB209_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i64_seq_cst:
; RV64IA-NOZACAS: # %bb.0:
; RV64IA-NOZACAS-NEXT: .LBB209_1: # =>This Inner Loop Header: Depth=1
@@ -31997,6 +39421,16 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32017,6 +39451,17 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB210_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB210_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_or_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoor.d a0, a1, (a0)
@@ -32036,6 +39481,16 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32056,6 +39511,17 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB211_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB211_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.d.aq a0, a1, (a0)
@@ -32080,6 +39546,16 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32100,6 +39576,17 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB212_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB212_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.d.rl a0, a1, (a0)
@@ -32124,6 +39611,16 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32144,6 +39641,17 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB213_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB213_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0)
@@ -32168,6 +39676,16 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_or_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32188,6 +39706,17 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB214_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB214_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_or_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoor.d.aqrl a0, a1, (a0)
@@ -32212,6 +39741,16 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32232,6 +39771,17 @@ define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB215_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB215_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_xor_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoxor.d a0, a1, (a0)
@@ -32251,6 +39801,16 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 2
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32271,6 +39831,17 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB216_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB216_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.d.aq a0, a1, (a0)
@@ -32295,6 +39866,16 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 3
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32315,6 +39896,17 @@ define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB217_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB217_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.d.rl a0, a1, (a0)
@@ -32339,6 +39931,16 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 4
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32359,6 +39961,17 @@ define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB218_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB218_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0)
@@ -32383,6 +39996,16 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 5
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_xor_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
@@ -32403,6 +40026,17 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB219_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB219_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_xor_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amoxor.d.aqrl a0, a1, (a0)
@@ -32471,6 +40105,60 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB220_2
+; RV32I-ZALRSC-NEXT: .LBB220_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_7
+; RV32I-ZALRSC-NEXT: .LBB220_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB220_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB220_5
+; RV32I-ZALRSC-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB220_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB220_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB220_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB220_1
+; RV32I-ZALRSC-NEXT: .LBB220_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -32561,6 +40249,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB220_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB220_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB220_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB220_3: # in Loop: Header=BB220_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB220_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_max_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomax.d a0, a1, (a0)
@@ -32624,6 +40327,60 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB221_2
+; RV32I-ZALRSC-NEXT: .LBB221_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 2
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_7
+; RV32I-ZALRSC-NEXT: .LBB221_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB221_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB221_5
+; RV32I-ZALRSC-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB221_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB221_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB221_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB221_1
+; RV32I-ZALRSC-NEXT: .LBB221_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -32714,6 +40471,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB221_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB221_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB221_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB221_3: # in Loop: Header=BB221_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB221_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.d.aq a0, a1, (a0)
@@ -32782,6 +40554,60 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB222_2
+; RV32I-ZALRSC-NEXT: .LBB222_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 3
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_7
+; RV32I-ZALRSC-NEXT: .LBB222_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB222_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB222_5
+; RV32I-ZALRSC-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB222_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB222_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB222_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB222_1
+; RV32I-ZALRSC-NEXT: .LBB222_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -32872,6 +40698,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB222_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB222_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB222_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB222_3: # in Loop: Header=BB222_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB222_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.d.rl a0, a1, (a0)
@@ -32940,6 +40781,60 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB223_2
+; RV32I-ZALRSC-NEXT: .LBB223_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 4
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_7
+; RV32I-ZALRSC-NEXT: .LBB223_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB223_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB223_5
+; RV32I-ZALRSC-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB223_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB223_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB223_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB223_1
+; RV32I-ZALRSC-NEXT: .LBB223_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33030,6 +40925,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB223_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB223_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB223_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB223_3: # in Loop: Header=BB223_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB223_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0)
@@ -33098,6 +41008,60 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB224_2
+; RV32I-ZALRSC-NEXT: .LBB224_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 5
+; RV32I-ZALRSC-NEXT: li a5, 5
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_7
+; RV32I-ZALRSC-NEXT: .LBB224_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB224_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB224_5
+; RV32I-ZALRSC-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB224_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB224_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB224_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB224_1
+; RV32I-ZALRSC-NEXT: .LBB224_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_max_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33188,6 +41152,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB224_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB224_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB224_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB224_3: # in Loop: Header=BB224_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB224_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomax.d.aqrl a0, a1, (a0)
@@ -33256,6 +41235,60 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB225_2
+; RV32I-ZALRSC-NEXT: .LBB225_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB225_7
+; RV32I-ZALRSC-NEXT: .LBB225_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB225_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB225_5
+; RV32I-ZALRSC-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB225_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB225_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB225_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB225_1
+; RV32I-ZALRSC-NEXT: .LBB225_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33346,6 +41379,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB225_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB225_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB225_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB225_3: # in Loop: Header=BB225_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB225_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_min_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomin.d a0, a1, (a0)
@@ -33409,6 +41457,60 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB226_2
+; RV32I-ZALRSC-NEXT: .LBB226_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 2
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB226_7
+; RV32I-ZALRSC-NEXT: .LBB226_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB226_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB226_5
+; RV32I-ZALRSC-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB226_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB226_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB226_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB226_1
+; RV32I-ZALRSC-NEXT: .LBB226_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33499,6 +41601,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB226_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB226_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB226_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB226_3: # in Loop: Header=BB226_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB226_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.d.aq a0, a1, (a0)
@@ -33567,6 +41684,60 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB227_2
+; RV32I-ZALRSC-NEXT: .LBB227_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 3
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB227_7
+; RV32I-ZALRSC-NEXT: .LBB227_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB227_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB227_5
+; RV32I-ZALRSC-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB227_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB227_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB227_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB227_1
+; RV32I-ZALRSC-NEXT: .LBB227_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33657,6 +41828,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB227_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB227_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB227_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB227_3: # in Loop: Header=BB227_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB227_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.d.rl a0, a1, (a0)
@@ -33725,6 +41911,60 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB228_2
+; RV32I-ZALRSC-NEXT: .LBB228_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 4
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB228_7
+; RV32I-ZALRSC-NEXT: .LBB228_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB228_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB228_5
+; RV32I-ZALRSC-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB228_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB228_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB228_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB228_1
+; RV32I-ZALRSC-NEXT: .LBB228_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33815,6 +42055,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB228_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB228_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB228_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB228_3: # in Loop: Header=BB228_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB228_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0)
@@ -33883,6 +42138,60 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB229_2
+; RV32I-ZALRSC-NEXT: .LBB229_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 5
+; RV32I-ZALRSC-NEXT: li a5, 5
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB229_7
+; RV32I-ZALRSC-NEXT: .LBB229_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB229_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB229_5
+; RV32I-ZALRSC-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB229_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB229_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB229_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB229_1
+; RV32I-ZALRSC-NEXT: .LBB229_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_min_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -33973,6 +42282,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB229_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB229_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB229_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB229_3: # in Loop: Header=BB229_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB229_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomin.d.aqrl a0, a1, (a0)
@@ -34041,6 +42365,60 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB230_2
+; RV32I-ZALRSC-NEXT: .LBB230_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_7
+; RV32I-ZALRSC-NEXT: .LBB230_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB230_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB230_5
+; RV32I-ZALRSC-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB230_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB230_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB230_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB230_1
+; RV32I-ZALRSC-NEXT: .LBB230_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34131,6 +42509,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB230_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB230_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB230_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB230_3: # in Loop: Header=BB230_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB230_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_umax_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomaxu.d a0, a1, (a0)
@@ -34194,6 +42587,60 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB231_2
+; RV32I-ZALRSC-NEXT: .LBB231_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 2
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_7
+; RV32I-ZALRSC-NEXT: .LBB231_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB231_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB231_5
+; RV32I-ZALRSC-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB231_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB231_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB231_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB231_1
+; RV32I-ZALRSC-NEXT: .LBB231_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34284,6 +42731,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB231_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB231_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB231_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB231_3: # in Loop: Header=BB231_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB231_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.d.aq a0, a1, (a0)
@@ -34352,6 +42814,60 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB232_2
+; RV32I-ZALRSC-NEXT: .LBB232_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 3
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_7
+; RV32I-ZALRSC-NEXT: .LBB232_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB232_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB232_5
+; RV32I-ZALRSC-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB232_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB232_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB232_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB232_1
+; RV32I-ZALRSC-NEXT: .LBB232_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34442,6 +42958,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB232_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB232_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB232_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB232_3: # in Loop: Header=BB232_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB232_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.d.rl a0, a1, (a0)
@@ -34510,6 +43041,60 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB233_2
+; RV32I-ZALRSC-NEXT: .LBB233_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 4
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_7
+; RV32I-ZALRSC-NEXT: .LBB233_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB233_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB233_5
+; RV32I-ZALRSC-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB233_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB233_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB233_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB233_1
+; RV32I-ZALRSC-NEXT: .LBB233_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34600,6 +43185,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB233_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB233_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB233_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB233_3: # in Loop: Header=BB233_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB233_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0)
@@ -34668,6 +43268,60 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB234_2
+; RV32I-ZALRSC-NEXT: .LBB234_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 5
+; RV32I-ZALRSC-NEXT: li a5, 5
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_7
+; RV32I-ZALRSC-NEXT: .LBB234_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB234_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB234_5
+; RV32I-ZALRSC-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB234_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB234_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB234_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB234_1
+; RV32I-ZALRSC-NEXT: .LBB234_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34758,6 +43412,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB234_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB234_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB234_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB234_3: # in Loop: Header=BB234_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB234_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amomaxu.d.aqrl a0, a1, (a0)
@@ -34826,6 +43495,60 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB235_2
+; RV32I-ZALRSC-NEXT: .LBB235_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB235_7
+; RV32I-ZALRSC-NEXT: .LBB235_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB235_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB235_5
+; RV32I-ZALRSC-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB235_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB235_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB235_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB235_1
+; RV32I-ZALRSC-NEXT: .LBB235_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -34916,6 +43639,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB235_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB235_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB235_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB235_3: # in Loop: Header=BB235_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB235_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-LABEL: atomicrmw_umin_i64_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: amominu.d a0, a1, (a0)
@@ -34979,6 +43717,60 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB236_2
+; RV32I-ZALRSC-NEXT: .LBB236_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 2
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB236_7
+; RV32I-ZALRSC-NEXT: .LBB236_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB236_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB236_5
+; RV32I-ZALRSC-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB236_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB236_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB236_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB236_1
+; RV32I-ZALRSC-NEXT: .LBB236_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i64_acquire:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -35069,6 +43861,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acquire:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB236_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB236_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB236_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB236_3: # in Loop: Header=BB236_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB236_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acquire:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.d.aq a0, a1, (a0)
@@ -35137,6 +43944,60 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_release:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB237_2
+; RV32I-ZALRSC-NEXT: .LBB237_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 3
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB237_7
+; RV32I-ZALRSC-NEXT: .LBB237_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB237_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB237_5
+; RV32I-ZALRSC-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB237_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB237_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB237_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB237_1
+; RV32I-ZALRSC-NEXT: .LBB237_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i64_release:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -35227,6 +44088,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_release:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB237_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB237_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB237_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB237_3: # in Loop: Header=BB237_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB237_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_release:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.d.rl a0, a1, (a0)
@@ -35295,6 +44171,60 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB238_2
+; RV32I-ZALRSC-NEXT: .LBB238_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 4
+; RV32I-ZALRSC-NEXT: li a5, 2
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB238_7
+; RV32I-ZALRSC-NEXT: .LBB238_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB238_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB238_5
+; RV32I-ZALRSC-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB238_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB238_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB238_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB238_1
+; RV32I-ZALRSC-NEXT: .LBB238_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -35385,6 +44315,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_acq_rel:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB238_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aq a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB238_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB238_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB238_3: # in Loop: Header=BB238_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB238_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acq_rel:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0)
@@ -35453,6 +44398,60 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB239_2
+; RV32I-ZALRSC-NEXT: .LBB239_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: li a4, 5
+; RV32I-ZALRSC-NEXT: li a5, 5
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB239_7
+; RV32I-ZALRSC-NEXT: .LBB239_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB239_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB239_5
+; RV32I-ZALRSC-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB239_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB239_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB239_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB239_1
+; RV32I-ZALRSC-NEXT: .LBB239_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
@@ -35543,6 +44542,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_seq_cst:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB239_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d.aqrl a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB239_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB239_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB239_3: # in Loop: Header=BB239_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d.rl a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB239_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
+;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: amominu.d.aqrl a0, a1, (a0)
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 7d29ac9..7fe5fa7 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -5,12 +5,16 @@
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32I-ZALRSC %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s
+; RUN: llc -mtriple=riscv64 -mattr=+zalrsc -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64I-ZALRSC %s
define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
; RV32I-LABEL: atomic_load_i8_unordered:
@@ -30,6 +34,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
; RV32IA-NEXT: lb a0, 0(a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomic_load_i8_unordered:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -46,6 +55,11 @@ define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: lb a0, 0(a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i8_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lb a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
%1 = load atomic i8, ptr %a unordered, align 1
ret i8 %1
}
@@ -68,6 +82,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
; RV32IA-NEXT: lh a0, 0(a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomic_load_i16_unordered:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -84,6 +103,11 @@ define signext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: lh a0, 0(a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i16_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lh a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
%1 = load atomic i16, ptr %a unordered, align 2
ret i16 %1
}
@@ -104,6 +128,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
; RV32IA-NEXT: lw a0, 0(a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomic_load_i32_unordered:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -119,6 +148,11 @@ define signext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: lw a0, 0(a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomic_load_i32_unordered:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: lw a0, 0(a0)
+; RV64I-ZALRSC-NEXT: ret
%1 = load atomic i32, ptr %a unordered, align 4
ret i32 %1
}
@@ -159,6 +193,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB3_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -192,6 +248,28 @@ define signext i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB3_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xchg ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -231,6 +309,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB4_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_add_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -264,6 +364,28 @@ define signext i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB4_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw add ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -303,6 +425,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a4, a1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB5_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -336,6 +480,28 @@ define signext i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a4, a1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB5_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -369,6 +535,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_and_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -396,6 +583,27 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -436,6 +644,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a4, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB7_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_nand_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -470,6 +701,29 @@ define signext i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a4, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB7_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw nand ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -499,6 +753,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_or_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -522,6 +793,23 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -551,6 +839,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xor_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -574,6 +879,23 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -653,6 +975,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB10_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB10_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_max_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -726,6 +1079,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB10_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB10_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw max ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -805,6 +1189,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: slli a1, a1, 24
+; RV32I-ZALRSC-NEXT: andi a4, a0, 24
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: srai a1, a1, 24
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: xori a4, a4, 24
+; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB11_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB11_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_min_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -878,6 +1293,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: slli a1, a1, 56
+; RV64I-ZALRSC-NEXT: andi a4, a0, 24
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: srai a1, a1, 56
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: xori a4, a4, 56
+; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB11_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB11_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw min ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -950,6 +1396,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB12_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umax_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -1016,6 +1488,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB12_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB12_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umax ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -1088,6 +1586,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a3, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a4, a3
+; RV32I-ZALRSC-NEXT: mv a5, a4
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a4, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a3
+; RV32I-ZALRSC-NEXT: xor a5, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB13_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umin_i8_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -1154,6 +1678,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i8_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a3, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a4, a3
+; RV64I-ZALRSC-NEXT: mv a5, a4
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB13_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a4, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a3
+; RV64I-ZALRSC-NEXT: xor a5, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB13_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umin ptr %a, i8 %b monotonic
ret i8 %1
}
@@ -1194,6 +1744,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: mv a5, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB14_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1228,6 +1801,29 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: mv a5, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB14_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xchg ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1268,6 +1864,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: add a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_add_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1302,6 +1921,29 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: add a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw add ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1342,6 +1984,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: sub a5, a3, a1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1376,6 +2041,29 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: sub a5, a3, a1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1410,6 +2098,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: not a3, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: or a1, a1, a3
+; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB17_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_and_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1438,6 +2148,28 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: not a3, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: or a1, a1, a3
+; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB17_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1479,6 +2211,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a5, a3, a1
+; RV32I-ZALRSC-NEXT: not a5, a5
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_nand_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1514,6 +2270,30 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a5, a3, a1
+; RV64I-ZALRSC-NEXT: not a5, a5
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw nand ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1544,6 +2324,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: or a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB19_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_or_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1568,6 +2366,24 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: or a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB19_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1598,6 +2414,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: srli a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: xor a4, a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB20_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xor_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -1622,6 +2456,24 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: srli a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: xor a4, a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB20_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1703,6 +2555,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB21_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB21_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_max_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -1778,6 +2663,39 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB21_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB21_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw max ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -1859,6 +2777,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: slli a1, a1, 16
+; RV32I-ZALRSC-NEXT: li a4, 16
+; RV32I-ZALRSC-NEXT: andi a5, a0, 24
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: srai a1, a1, 16
+; RV32I-ZALRSC-NEXT: sll a3, a3, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sub a4, a4, a5
+; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV32I-ZALRSC-NEXT: and a7, a5, a3
+; RV32I-ZALRSC-NEXT: mv a6, a5
+; RV32I-ZALRSC-NEXT: sll a7, a7, a4
+; RV32I-ZALRSC-NEXT: sra a7, a7, a4
+; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB22_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a1
+; RV32I-ZALRSC-NEXT: and a6, a6, a3
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB22_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_min_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -1934,6 +2885,39 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: slli a1, a1, 48
+; RV64I-ZALRSC-NEXT: li a4, 48
+; RV64I-ZALRSC-NEXT: andi a5, a0, 24
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: srai a1, a1, 48
+; RV64I-ZALRSC-NEXT: sllw a3, a3, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sub a4, a4, a5
+; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a2)
+; RV64I-ZALRSC-NEXT: and a7, a5, a3
+; RV64I-ZALRSC-NEXT: mv a6, a5
+; RV64I-ZALRSC-NEXT: sll a7, a7, a4
+; RV64I-ZALRSC-NEXT: sra a7, a7, a4
+; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB22_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a1
+; RV64I-ZALRSC-NEXT: and a6, a6, a3
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB22_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw min ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -2011,6 +2995,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umax_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2082,6 +3093,33 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umax ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -2159,6 +3197,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a3, 16
+; RV32I-ZALRSC-NEXT: addi a3, a3, -1
+; RV32I-ZALRSC-NEXT: sll a4, a3, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a3
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV32I-ZALRSC-NEXT: and a6, a3, a4
+; RV32I-ZALRSC-NEXT: mv a5, a3
+; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a3, a1
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a3, a5
+; RV32I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: srl a0, a3, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umin_i16_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2230,6 +3295,33 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a3, 16
+; RV64I-ZALRSC-NEXT: addi a3, a3, -1
+; RV64I-ZALRSC-NEXT: sllw a4, a3, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a3
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a2)
+; RV64I-ZALRSC-NEXT: and a6, a3, a4
+; RV64I-ZALRSC-NEXT: mv a5, a3
+; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a3, a1
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a3, a5
+; RV64I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: srlw a0, a3, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umin ptr %a, i16 %b monotonic
ret i16 %1
}
@@ -2250,6 +3342,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoswap.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB25_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xchg_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2265,6 +3368,17 @@ define signext i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoswap.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB25_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xchg ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2285,6 +3399,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoadd.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: add a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB26_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_add_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2300,6 +3425,17 @@ define signext i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoadd.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB26_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw add ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2321,6 +3457,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoadd.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: sub a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB27_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2337,6 +3484,17 @@ define signext i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-NEXT: neg a1, a1
; RV64IA-NEXT: amoadd.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB27_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2357,6 +3515,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoand.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB28_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_and_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2372,6 +3541,17 @@ define signext i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoand.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB28_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw and ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2413,6 +3593,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-ZACAS-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: and a3, a2, a1
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB29_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_nand_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2449,6 +3641,18 @@ define signext i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB29_1
; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB29_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw nand ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2469,6 +3673,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoor.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: or a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB30_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_or_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2484,6 +3699,17 @@ define signext i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoor.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB30_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw or ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2504,6 +3730,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amoxor.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: xor a3, a2, a1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB31_1
+; RV32I-ZALRSC-NEXT: # %bb.2:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xor_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2519,6 +3756,17 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoxor.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB31_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xor ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2565,6 +3813,21 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amomax.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a3, a1, .LBB32_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB32_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_max_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2608,6 +3871,22 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomax.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB32_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB32_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw max ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2654,6 +3933,21 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amomin.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bge a1, a3, .LBB33_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB33_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_min_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2697,6 +3991,22 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomin.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB33_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB33_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw min ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2743,6 +4053,21 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amomaxu.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a3, a1, .LBB34_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB34_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umax_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2786,6 +4111,22 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomaxu.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB34_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB34_3: # in Loop: Header=BB34_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB34_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umax ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2832,6 +4173,21 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32IA-NEXT: amominu.w a0, a1, (a0)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a0)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: bgeu a1, a3, .LBB35_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a1
+; RV32I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB35_1
+; RV32I-ZALRSC-NEXT: # %bb.4:
+; RV32I-ZALRSC-NEXT: mv a0, a2
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umin_i32_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
@@ -2875,6 +4231,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amominu.w a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: sext.w a2, a1
+; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB35_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB35_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umin ptr %a, i32 %b monotonic
ret i32 %1
}
@@ -2900,6 +4272,16 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_exchange_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xchg_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2914,6 +4296,17 @@ define signext i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoswap.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB36_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xchg ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -2939,6 +4332,16 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_add_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_add_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2953,6 +4356,17 @@ define signext i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoadd.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB37_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw add ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -2978,6 +4392,16 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_sub_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -2993,6 +4417,17 @@ define signext i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA-NEXT: neg a1, a1
; RV64IA-NEXT: amoadd.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB38_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw sub ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3018,6 +4453,16 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_and_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_and_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3032,6 +4477,17 @@ define signext i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoand.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB39_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw and ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3057,6 +4513,16 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_nand_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_nand_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3092,6 +4558,18 @@ define signext i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA-ZACAS-NEXT: bne a0, a3, .LBB40_1
; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a2, a1
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB40_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw nand ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3117,6 +4595,16 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_or_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_or_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3131,6 +4619,17 @@ define signext i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoor.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB41_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw or ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3156,6 +4655,16 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -16
+; RV32I-ZALRSC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: li a3, 0
+; RV32I-ZALRSC-NEXT: call __atomic_fetch_xor_8
+; RV32I-ZALRSC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xor_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3170,6 +4679,17 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amoxor.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a2, a1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB42_1
+; RV64I-ZALRSC-NEXT: # %bb.2:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw xor ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3283,6 +4803,60 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB43_2
+; RV32I-ZALRSC-NEXT: .LBB43_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_7
+; RV32I-ZALRSC-NEXT: .LBB43_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB43_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB43_5
+; RV32I-ZALRSC-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB43_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB43_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB43_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB43_1
+; RV32I-ZALRSC-NEXT: .LBB43_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_max_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -3323,6 +4897,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomax.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a3, a1, .LBB43_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB43_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw max ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3436,6 +5025,60 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB44_2
+; RV32I-ZALRSC-NEXT: .LBB44_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB44_7
+; RV32I-ZALRSC-NEXT: .LBB44_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB44_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT: slt a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB44_5
+; RV32I-ZALRSC-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB44_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB44_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB44_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB44_1
+; RV32I-ZALRSC-NEXT: .LBB44_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_min_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -3476,6 +5119,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomin.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bge a1, a3, .LBB44_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB44_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw min ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3589,6 +5247,60 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB45_2
+; RV32I-ZALRSC-NEXT: .LBB45_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_7
+; RV32I-ZALRSC-NEXT: .LBB45_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB45_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB45_5
+; RV32I-ZALRSC-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB45_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB45_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB45_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB45_1
+; RV32I-ZALRSC-NEXT: .LBB45_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umax_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -3629,6 +5341,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amomaxu.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a3, a1, .LBB45_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB45_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umax ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3742,6 +5469,60 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: addi sp, sp, -32
+; RV32I-ZALRSC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-ZALRSC-NEXT: mv s0, a2
+; RV32I-ZALRSC-NEXT: mv s1, a0
+; RV32I-ZALRSC-NEXT: lw a4, 0(a0)
+; RV32I-ZALRSC-NEXT: lw a5, 4(a0)
+; RV32I-ZALRSC-NEXT: mv s2, a1
+; RV32I-ZALRSC-NEXT: j .LBB46_2
+; RV32I-ZALRSC-NEXT: .LBB46_1: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT: sw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: sw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: addi a1, sp, 8
+; RV32I-ZALRSC-NEXT: mv a0, s1
+; RV32I-ZALRSC-NEXT: li a4, 0
+; RV32I-ZALRSC-NEXT: li a5, 0
+; RV32I-ZALRSC-NEXT: call __atomic_compare_exchange_8
+; RV32I-ZALRSC-NEXT: lw a4, 8(sp)
+; RV32I-ZALRSC-NEXT: lw a5, 12(sp)
+; RV32I-ZALRSC-NEXT: bnez a0, .LBB46_7
+; RV32I-ZALRSC-NEXT: .LBB46_2: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: beq a5, s0, .LBB46_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s0, a5
+; RV32I-ZALRSC-NEXT: j .LBB46_5
+; RV32I-ZALRSC-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT: sltu a0, s2, a4
+; RV32I-ZALRSC-NEXT: .LBB46_5: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, a4
+; RV32I-ZALRSC-NEXT: mv a3, a5
+; RV32I-ZALRSC-NEXT: beqz a0, .LBB46_1
+; RV32I-ZALRSC-NEXT: # %bb.6: # %atomicrmw.start
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB46_2 Depth=1
+; RV32I-ZALRSC-NEXT: mv a2, s2
+; RV32I-ZALRSC-NEXT: mv a3, s0
+; RV32I-ZALRSC-NEXT: j .LBB46_1
+; RV32I-ZALRSC-NEXT: .LBB46_7: # %atomicrmw.end
+; RV32I-ZALRSC-NEXT: mv a0, a4
+; RV32I-ZALRSC-NEXT: mv a1, a5
+; RV32I-ZALRSC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-ZALRSC-NEXT: addi sp, sp, 32
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umin_i64_monotonic:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -3782,6 +5563,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64IA: # %bb.0:
; RV64IA-NEXT: amominu.d a0, a1, (a0)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i64_monotonic:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.d a2, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: bgeu a1, a3, .LBB46_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.d a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB46_1
+; RV64I-ZALRSC-NEXT: # %bb.4:
+; RV64I-ZALRSC-NEXT: mv a0, a2
+; RV64I-ZALRSC-NEXT: ret
%1 = atomicrmw umin ptr %a, i64 %b monotonic
ret i64 %1
}
@@ -3827,6 +5623,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV32IA-NEXT: srai a0, a0, 24
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a3, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a4, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: zext.b a2, a2
+; RV32I-ZALRSC-NEXT: sll a4, a4, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a5, (a3)
+; RV32I-ZALRSC-NEXT: and a6, a5, a4
+; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB47_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a5, a2
+; RV32I-ZALRSC-NEXT: and a6, a6, a4
+; RV32I-ZALRSC-NEXT: xor a6, a5, a6
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1
+; RV32I-ZALRSC-NEXT: .LBB47_3:
+; RV32I-ZALRSC-NEXT: srl a0, a5, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 24
+; RV32I-ZALRSC-NEXT: srai a0, a0, 24
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3866,6 +5688,32 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV64IA-NEXT: slli a0, a0, 56
; RV64IA-NEXT: srai a0, a0, 56
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a3, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a4, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: zext.b a2, a2
+; RV64I-ZALRSC-NEXT: sllw a4, a4, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a5, (a3)
+; RV64I-ZALRSC-NEXT: and a6, a5, a4
+; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB47_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a5, a2
+; RV64I-ZALRSC-NEXT: and a6, a6, a4
+; RV64I-ZALRSC-NEXT: xor a6, a5, a6
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1
+; RV64I-ZALRSC-NEXT: .LBB47_3:
+; RV64I-ZALRSC-NEXT: srlw a0, a5, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 56
+; RV64I-ZALRSC-NEXT: srai a0, a0, 56
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
%2 = extractvalue { i8, i1 } %1, 0
ret i8 %2
@@ -3911,6 +5759,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV32IA-NEXT: seqz a0, a1
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a3, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: li a4, 255
+; RV32I-ZALRSC-NEXT: zext.b a1, a1
+; RV32I-ZALRSC-NEXT: zext.b a2, a2
+; RV32I-ZALRSC-NEXT: sll a4, a4, a0
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sll a0, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a3)
+; RV32I-ZALRSC-NEXT: and a5, a2, a4
+; RV32I-ZALRSC-NEXT: bne a5, a1, .LBB48_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a5, a2, a0
+; RV32I-ZALRSC-NEXT: and a5, a5, a4
+; RV32I-ZALRSC-NEXT: xor a5, a2, a5
+; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3)
+; RV32I-ZALRSC-NEXT: bnez a5, .LBB48_1
+; RV32I-ZALRSC-NEXT: .LBB48_3:
+; RV32I-ZALRSC-NEXT: and a2, a2, a4
+; RV32I-ZALRSC-NEXT: xor a1, a1, a2
+; RV32I-ZALRSC-NEXT: seqz a0, a1
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -3949,6 +5823,32 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV64IA-NEXT: xor a1, a1, a2
; RV64IA-NEXT: seqz a0, a1
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a3, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: li a4, 255
+; RV64I-ZALRSC-NEXT: zext.b a1, a1
+; RV64I-ZALRSC-NEXT: zext.b a2, a2
+; RV64I-ZALRSC-NEXT: sllw a4, a4, a0
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sllw a0, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a3)
+; RV64I-ZALRSC-NEXT: and a5, a2, a4
+; RV64I-ZALRSC-NEXT: bne a5, a1, .LBB48_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a5, a2, a0
+; RV64I-ZALRSC-NEXT: and a5, a5, a4
+; RV64I-ZALRSC-NEXT: xor a5, a2, a5
+; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3)
+; RV64I-ZALRSC-NEXT: bnez a5, .LBB48_1
+; RV64I-ZALRSC-NEXT: .LBB48_3:
+; RV64I-ZALRSC-NEXT: and a2, a2, a4
+; RV64I-ZALRSC-NEXT: xor a1, a1, a2
+; RV64I-ZALRSC-NEXT: seqz a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
%2 = extractvalue { i8, i1 } %1, 1
ret i1 %2
@@ -3996,6 +5896,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV32IA-NEXT: srai a0, a0, 16
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a3, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a4, 16
+; RV32I-ZALRSC-NEXT: addi a4, a4, -1
+; RV32I-ZALRSC-NEXT: sll a5, a4, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a4
+; RV32I-ZALRSC-NEXT: and a2, a2, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sll a2, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a4, (a3)
+; RV32I-ZALRSC-NEXT: and a6, a4, a5
+; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB49_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a6, a4, a2
+; RV32I-ZALRSC-NEXT: and a6, a6, a5
+; RV32I-ZALRSC-NEXT: xor a6, a4, a6
+; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3)
+; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1
+; RV32I-ZALRSC-NEXT: .LBB49_3:
+; RV32I-ZALRSC-NEXT: srl a0, a4, a0
+; RV32I-ZALRSC-NEXT: slli a0, a0, 16
+; RV32I-ZALRSC-NEXT: srai a0, a0, 16
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -4036,6 +5963,33 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV64IA-NEXT: slli a0, a0, 48
; RV64IA-NEXT: srai a0, a0, 48
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a3, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a4, 16
+; RV64I-ZALRSC-NEXT: addi a4, a4, -1
+; RV64I-ZALRSC-NEXT: sllw a5, a4, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a4
+; RV64I-ZALRSC-NEXT: and a2, a2, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sllw a2, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a4, (a3)
+; RV64I-ZALRSC-NEXT: and a6, a4, a5
+; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB49_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a6, a4, a2
+; RV64I-ZALRSC-NEXT: and a6, a6, a5
+; RV64I-ZALRSC-NEXT: xor a6, a4, a6
+; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3)
+; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1
+; RV64I-ZALRSC-NEXT: .LBB49_3:
+; RV64I-ZALRSC-NEXT: srlw a0, a4, a0
+; RV64I-ZALRSC-NEXT: slli a0, a0, 48
+; RV64I-ZALRSC-NEXT: srai a0, a0, 48
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
%2 = extractvalue { i16, i1 } %1, 0
ret i16 %2
@@ -4082,6 +6036,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV32IA-NEXT: seqz a0, a1
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a3, a0, -4
+; RV32I-ZALRSC-NEXT: slli a0, a0, 3
+; RV32I-ZALRSC-NEXT: lui a4, 16
+; RV32I-ZALRSC-NEXT: addi a4, a4, -1
+; RV32I-ZALRSC-NEXT: sll a5, a4, a0
+; RV32I-ZALRSC-NEXT: and a1, a1, a4
+; RV32I-ZALRSC-NEXT: and a2, a2, a4
+; RV32I-ZALRSC-NEXT: sll a1, a1, a0
+; RV32I-ZALRSC-NEXT: sll a0, a2, a0
+; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a2, (a3)
+; RV32I-ZALRSC-NEXT: and a4, a2, a5
+; RV32I-ZALRSC-NEXT: bne a4, a1, .LBB50_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32I-ZALRSC-NEXT: xor a4, a2, a0
+; RV32I-ZALRSC-NEXT: and a4, a4, a5
+; RV32I-ZALRSC-NEXT: xor a4, a2, a4
+; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a3)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB50_1
+; RV32I-ZALRSC-NEXT: .LBB50_3:
+; RV32I-ZALRSC-NEXT: and a2, a2, a5
+; RV32I-ZALRSC-NEXT: xor a1, a1, a2
+; RV32I-ZALRSC-NEXT: seqz a0, a1
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -4121,6 +6102,33 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV64IA-NEXT: xor a1, a1, a2
; RV64IA-NEXT: seqz a0, a1
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a3, a0, -4
+; RV64I-ZALRSC-NEXT: slli a0, a0, 3
+; RV64I-ZALRSC-NEXT: lui a4, 16
+; RV64I-ZALRSC-NEXT: addi a4, a4, -1
+; RV64I-ZALRSC-NEXT: sllw a5, a4, a0
+; RV64I-ZALRSC-NEXT: and a1, a1, a4
+; RV64I-ZALRSC-NEXT: and a2, a2, a4
+; RV64I-ZALRSC-NEXT: sllw a1, a1, a0
+; RV64I-ZALRSC-NEXT: sllw a0, a2, a0
+; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a2, (a3)
+; RV64I-ZALRSC-NEXT: and a4, a2, a5
+; RV64I-ZALRSC-NEXT: bne a4, a1, .LBB50_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV64I-ZALRSC-NEXT: xor a4, a2, a0
+; RV64I-ZALRSC-NEXT: and a4, a4, a5
+; RV64I-ZALRSC-NEXT: xor a4, a2, a4
+; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a3)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB50_1
+; RV64I-ZALRSC-NEXT: .LBB50_3:
+; RV64I-ZALRSC-NEXT: and a2, a2, a5
+; RV64I-ZALRSC-NEXT: xor a1, a1, a2
+; RV64I-ZALRSC-NEXT: seqz a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
%2 = extractvalue { i16, i1 } %1, 1
ret i1 %2
@@ -4159,6 +6167,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
; RV32IA-ZACAS-NEXT: mv a0, a1
; RV32IA-ZACAS-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a0)
+; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB51_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB51_1
+; RV32I-ZALRSC-NEXT: .LBB51_3:
+; RV32I-ZALRSC-NEXT: mv a0, a3
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -4190,6 +6210,18 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0)
; RV64IA-ZACAS-NEXT: mv a0, a1
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a0)
+; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB51_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB51_1
+; RV64I-ZALRSC-NEXT: .LBB51_3:
+; RV64I-ZALRSC-NEXT: mv a0, a3
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%2 = extractvalue { i32, i1 } %1, 0
ret i32 %2
@@ -4230,6 +6262,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
; RV32IA-ZACAS-NEXT: seqz a0, a1
; RV32IA-ZACAS-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a3, (a0)
+; RV32I-ZALRSC-NEXT: bne a3, a1, .LBB52_3
+; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a4, a2, (a0)
+; RV32I-ZALRSC-NEXT: bnez a4, .LBB52_1
+; RV32I-ZALRSC-NEXT: .LBB52_3:
+; RV32I-ZALRSC-NEXT: xor a1, a3, a1
+; RV32I-ZALRSC-NEXT: seqz a0, a1
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
@@ -4263,6 +6308,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
; RV64IA-ZACAS-NEXT: xor a1, a3, a1
; RV64IA-ZACAS-NEXT: seqz a0, a1
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a3, (a0)
+; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB52_3
+; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a4, a2, (a0)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB52_1
+; RV64I-ZALRSC-NEXT: .LBB52_3:
+; RV64I-ZALRSC-NEXT: xor a1, a3, a1
+; RV64I-ZALRSC-NEXT: seqz a0, a1
+; RV64I-ZALRSC-NEXT: ret
%1 = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%2 = extractvalue { i32, i1 } %1, 1
ret i1 %2
@@ -4304,6 +6362,27 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB53_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB53_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB53_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB53_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4339,6 +6418,28 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: li a2, 1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB53_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB53_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB53_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB53_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4391,6 +6492,27 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB54_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB54_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: add a3, a0, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB54_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB54_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: addi a2, a0, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4426,6 +6548,28 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: addi a2, a0, 1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB54_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB54_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: add a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB54_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB54_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: addi a2, a1, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4479,6 +6623,27 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB55_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB55_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: sub a3, a0, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB55_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB55_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: addi a2, a0, -1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4514,6 +6679,28 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: addi a2, a0, -1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB55_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB55_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: sub a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB55_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB55_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: addi a2, a1, -1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4567,6 +6754,27 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB56_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB56_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: and a3, a0, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB56_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB56_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: andi a2, a0, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4602,6 +6810,28 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: andi a2, a0, 1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB56_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB56_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: and a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB56_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB56_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: andi a2, a1, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4685,6 +6915,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-ZACAS-NEXT: mv a0, a1
; RV32IA-ZACAS-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB57_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB57_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: and a3, a0, a2
+; RV32I-ZALRSC-NEXT: not a3, a3
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB57_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB57_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: andi a2, a0, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4750,6 +7002,28 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-ZACAS-NEXT: sw a2, 0(a0)
; RV64IA-ZACAS-NEXT: mv a0, a1
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_nand_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a1, 1
+; RV64I-ZALRSC-NEXT: mv a1, a0
+; RV64I-ZALRSC-NEXT: beqz a2, .LBB57_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB57_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT: and a3, a0, a2
+; RV64I-ZALRSC-NEXT: not a3, a3
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB57_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB57_2: # %else
+; RV64I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT: andi a2, a0, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4803,6 +7077,27 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB58_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB58_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: or a3, a0, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB58_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB58_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: ori a2, a0, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4838,6 +7133,28 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV64IA-NEXT: ori a2, a0, 1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB58_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB58_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: or a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB58_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB58_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: ori a2, a1, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -4891,6 +7208,27 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB59_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB59_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: xor a3, a0, a2
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB59_3
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB59_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: xori a2, a0, 1
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 1
@@ -4926,6 +7264,28 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: xori a2, a0, 1
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB59_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB59_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: xor a3, a1, a2
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB59_3
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB59_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: xori a2, a1, 1
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -5007,6 +7367,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB60_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB60_5: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: mv a3, a0
+; RV32I-ZALRSC-NEXT: bge a3, a2, .LBB60_7
+; RV32I-ZALRSC-NEXT: # %bb.6: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: .LBB60_7: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB60_5
+; RV32I-ZALRSC-NEXT: # %bb.8: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB60_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: mv a2, a0
+; RV32I-ZALRSC-NEXT: bgtz a0, .LBB60_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %else
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB60_4: # %else
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -5070,6 +7461,37 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: .LBB60_4: # %else
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a1, 1
+; RV64I-ZALRSC-NEXT: mv a1, a0
+; RV64I-ZALRSC-NEXT: beqz a2, .LBB60_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB60_5: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT: mv a3, a0
+; RV64I-ZALRSC-NEXT: bge a3, a2, .LBB60_7
+; RV64I-ZALRSC-NEXT: # %bb.6: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB60_7: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB60_5 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB60_5
+; RV64I-ZALRSC-NEXT: # %bb.8: # %then
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB60_2: # %else
+; RV64I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT: mv a2, a0
+; RV64I-ZALRSC-NEXT: bgtz a0, .LBB60_4
+; RV64I-ZALRSC-NEXT: # %bb.3: # %else
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB60_4: # %else
+; RV64I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -5155,6 +7577,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB61_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB61_5: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: mv a3, a0
+; RV32I-ZALRSC-NEXT: bge a2, a3, .LBB61_7
+; RV32I-ZALRSC-NEXT: # %bb.6: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: .LBB61_7: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB61_5
+; RV32I-ZALRSC-NEXT: # %bb.8: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB61_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: mv a2, a0
+; RV32I-ZALRSC-NEXT: blez a0, .LBB61_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %else
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB61_4: # %else
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -5220,6 +7673,37 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: .LBB61_4: # %else
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_min_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a1, 1
+; RV64I-ZALRSC-NEXT: mv a1, a0
+; RV64I-ZALRSC-NEXT: beqz a2, .LBB61_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB61_5: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT: mv a3, a0
+; RV64I-ZALRSC-NEXT: bge a2, a3, .LBB61_7
+; RV64I-ZALRSC-NEXT: # %bb.6: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB61_7: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB61_5 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB61_5
+; RV64I-ZALRSC-NEXT: # %bb.8: # %then
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB61_2: # %else
+; RV64I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT: mv a2, a0
+; RV64I-ZALRSC-NEXT: blez a0, .LBB61_4
+; RV64I-ZALRSC-NEXT: # %bb.3: # %else
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB61_4: # %else
+; RV64I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -5290,6 +7774,34 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB62_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB62_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: mv a3, a0
+; RV32I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: .LBB62_5: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB62_3
+; RV32I-ZALRSC-NEXT: # %bb.6: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB62_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: seqz a2, a0
+; RV32I-ZALRSC-NEXT: add a2, a0, a2
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -5347,6 +7859,35 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: add a2, a0, a2
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a1, a1, 1
+; RV64I-ZALRSC-NEXT: beqz a1, .LBB62_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB62_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a1, (a0)
+; RV64I-ZALRSC-NEXT: mv a3, a1
+; RV64I-ZALRSC-NEXT: bgeu a3, a2, .LBB62_5
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB62_5: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB62_3 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a0)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB62_3
+; RV64I-ZALRSC-NEXT: # %bb.6: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB62_2: # %else
+; RV64I-ZALRSC-NEXT: lw a1, 0(a0)
+; RV64I-ZALRSC-NEXT: seqz a2, a1
+; RV64I-ZALRSC-NEXT: add a2, a1, a2
+; RV64I-ZALRSC-NEXT: sw a2, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a1
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -5434,6 +7975,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: andi a2, a1, 1
+; RV32I-ZALRSC-NEXT: mv a1, a0
+; RV32I-ZALRSC-NEXT: beqz a2, .LBB63_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB63_5: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV32I-ZALRSC-NEXT: mv a3, a0
+; RV32I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7
+; RV32I-ZALRSC-NEXT: # %bb.6: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1
+; RV32I-ZALRSC-NEXT: mv a3, a2
+; RV32I-ZALRSC-NEXT: .LBB63_7: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB63_5
+; RV32I-ZALRSC-NEXT: # %bb.8: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB63_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV32I-ZALRSC-NEXT: li a3, 1
+; RV32I-ZALRSC-NEXT: mv a2, a0
+; RV32I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4
+; RV32I-ZALRSC-NEXT: # %bb.3: # %else
+; RV32I-ZALRSC-NEXT: li a2, 1
+; RV32I-ZALRSC-NEXT: .LBB63_4: # %else
+; RV32I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
@@ -5501,6 +8074,38 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA-NEXT: .LBB63_4: # %else
; RV64IA-NEXT: sw a2, 0(a1)
; RV64IA-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: andi a2, a1, 1
+; RV64I-ZALRSC-NEXT: mv a1, a0
+; RV64I-ZALRSC-NEXT: beqz a2, .LBB63_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB63_5: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w a0, (a1)
+; RV64I-ZALRSC-NEXT: mv a3, a0
+; RV64I-ZALRSC-NEXT: bgeu a2, a3, .LBB63_7
+; RV64I-ZALRSC-NEXT: # %bb.6: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1
+; RV64I-ZALRSC-NEXT: mv a3, a2
+; RV64I-ZALRSC-NEXT: .LBB63_7: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB63_5 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w a3, a3, (a1)
+; RV64I-ZALRSC-NEXT: bnez a3, .LBB63_5
+; RV64I-ZALRSC-NEXT: # %bb.8: # %then
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB63_2: # %else
+; RV64I-ZALRSC-NEXT: lw a0, 0(a1)
+; RV64I-ZALRSC-NEXT: li a3, 1
+; RV64I-ZALRSC-NEXT: mv a2, a0
+; RV64I-ZALRSC-NEXT: bltu a0, a3, .LBB63_4
+; RV64I-ZALRSC-NEXT: # %bb.3: # %else
+; RV64I-ZALRSC-NEXT: li a2, 1
+; RV64I-ZALRSC-NEXT: .LBB63_4: # %else
+; RV64I-ZALRSC-NEXT: sw a2, 0(a1)
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
@@ -5570,6 +8175,25 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV32IA-ZACAS-NEXT: lw a0, 0(a0)
; RV32IA-ZACAS-NEXT: ret
;
+; RV32I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb:
+; RV32I-ZALRSC: # %bb.0:
+; RV32I-ZALRSC-NEXT: mv a4, a0
+; RV32I-ZALRSC-NEXT: beqz a3, .LBB64_2
+; RV32I-ZALRSC-NEXT: # %bb.1: # %then
+; RV32I-ZALRSC-NEXT: .LBB64_3: # %then
+; RV32I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-ZALRSC-NEXT: lr.w.aqrl a0, (a4)
+; RV32I-ZALRSC-NEXT: bne a0, a1, .LBB64_5
+; RV32I-ZALRSC-NEXT: # %bb.4: # %then
+; RV32I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1
+; RV32I-ZALRSC-NEXT: sc.w.rl a3, a2, (a4)
+; RV32I-ZALRSC-NEXT: bnez a3, .LBB64_3
+; RV32I-ZALRSC-NEXT: .LBB64_5: # %then
+; RV32I-ZALRSC-NEXT: ret
+; RV32I-ZALRSC-NEXT: .LBB64_2: # %else
+; RV32I-ZALRSC-NEXT: lw a0, 0(a4)
+; RV32I-ZALRSC-NEXT: ret
+;
; RV64I-LABEL: cmpxchg_i32_monotonic_crossbb:
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a3, .LBB64_2
@@ -5620,6 +8244,26 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV64IA-ZACAS-NEXT: .LBB64_2: # %else
; RV64IA-ZACAS-NEXT: lw a0, 0(a0)
; RV64IA-ZACAS-NEXT: ret
+;
+; RV64I-ZALRSC-LABEL: cmpxchg_i32_monotonic_crossbb:
+; RV64I-ZALRSC: # %bb.0:
+; RV64I-ZALRSC-NEXT: beqz a3, .LBB64_2
+; RV64I-ZALRSC-NEXT: # %bb.1: # %then
+; RV64I-ZALRSC-NEXT: .LBB64_3: # %then
+; RV64I-ZALRSC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0)
+; RV64I-ZALRSC-NEXT: bne a3, a1, .LBB64_5
+; RV64I-ZALRSC-NEXT: # %bb.4: # %then
+; RV64I-ZALRSC-NEXT: # in Loop: Header=BB64_3 Depth=1
+; RV64I-ZALRSC-NEXT: sc.w.rl a4, a2, (a0)
+; RV64I-ZALRSC-NEXT: bnez a4, .LBB64_3
+; RV64I-ZALRSC-NEXT: .LBB64_5: # %then
+; RV64I-ZALRSC-NEXT: sext.w a0, a3
+; RV64I-ZALRSC-NEXT: ret
+; RV64I-ZALRSC-NEXT: .LBB64_2: # %else
+; RV64I-ZALRSC-NEXT: lw a3, 0(a0)
+; RV64I-ZALRSC-NEXT: sext.w a0, a3
+; RV64I-ZALRSC-NEXT: ret
br i1 %c, label %then, label %else
then:
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
index cce1eda..1aee688 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll
@@ -1,4 +1,5 @@
; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
%"__cblayout_$Globals" = type <{ i32 }>
@@ -9,7 +10,6 @@
; CHECK: OpCapability Shader
; CHECK: OpCapability StorageTexelBufferArrayDynamicIndexingEXT
-
define void @main() local_unnamed_addr #0 {
entry:
%"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str")
@@ -19,4 +19,8 @@ entry:
%2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 98)
store i32 99, ptr addrspace(11) %2, align 4
ret void
-} \ No newline at end of file
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
index da69a2f..163fc9d 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll
@@ -1,4 +1,5 @@
; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
%"__cblayout_$Globals" = type <{ i32 }>
@@ -19,3 +20,7 @@ entry:
store i32 98, ptr addrspace(11) %2, align 4
ret void
}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @"$Globals.cb", ptr addrspace(12) @i}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll
new file mode 100644
index 0000000..7c44b6d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/TypedBufferLoad.ll
@@ -0,0 +1,43 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
+
+; When accessing read-only `Buffer` types, SPIR-V should use `OpImageFetch` instead of `OpImageRead`.
+; https://github.com/llvm/llvm-project/issues/162891
+
+; CHECK-DAG: OpCapability SampledBuffer
+; CHECK-DAG: OpCapability ImageBuffer
+; CHECK-DAG: [[TypeInt:%[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: [[TypeImageBuffer:%[0-9]+]] = OpTypeImage [[TypeInt]] Buffer 2 0 0 1 Unknown
+; CHECK-DAG: [[TypePtrImageBuffer:%[0-9]+]] = OpTypePointer UniformConstant [[TypeImageBuffer]]
+; CHECK-DAG: [[TypeVector:%[0-9]+]] = OpTypeVector [[TypeInt]] 4
+; CHECK-DAG: [[Index:%[0-9]+]] = OpConstant [[TypeInt]] 98
+; CHECK-DAG: [[Variable:%[0-9]+]] = OpVariable [[TypePtrImageBuffer]] UniformConstant
+@.str = private unnamed_addr constant [7 x i8] c"rwbuff\00", align 1
+@.str.2 = private unnamed_addr constant [5 x i8] c"buff\00", align 1
+@.str.4 = private unnamed_addr constant [8 x i8] c"unknown\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+ %1 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_1_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @.str.2)
+ %3 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_0_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @.str.4)
+ %4 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_1_0t(target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) %2, i32 98)
+; CHECK: [[Load:%[0-9]+]] = OpLoad [[TypeImageBuffer]] [[Variable]]
+; CHECK: [[ImageFetch:%[0-9]+]] = OpImageFetch [[TypeVector]] [[Load]] [[Index]]
+; CHECK: {{.*}} = OpCompositeExtract [[TypeInt]] [[ImageFetch]] 0
+ %5 = load i32, ptr addrspace(11) %4, align 4
+ %6 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 99)
+ store i32 %5, ptr addrspace(11) %6, align 4
+ %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 96)
+; CHECK: {{%[0-9]+}} = OpLoad {{.*}}
+; CHECK: {{%[0-9]+}} = OpImageRead {{.*}}
+ %8 = load i32, ptr addrspace(11) %7, align 4
+ %9 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 97)
+ store i32 %8, ptr addrspace(11) %9, align 4
+ %10 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_0_0t(target("spirv.Image", i32, 5, 2, 0, 0, 0, 0) %3, i32 94)
+; CHECK: {{%[0-9]+}} = OpLoad {{.*}}
+; CHECK: {{%[0-9]+}} = OpImageRead {{.*}}
+ %11 = load i32, ptr addrspace(11) %10, align 4
+ %12 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 95)
+ store i32 %11, ptr addrspace(11) %12, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
new file mode 100644
index 0000000..13149d7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
@@ -0,0 +1,3021 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=CHECK,AVX512
+
+;
+; CTPOP
+;
+
+define i32 @test_ctpop_i128(i128 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: popcntq %rsi, %rcx
+; CHECK-NEXT: popcntq %rdi, %rax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctpop_i128(ptr %p0) nounwind {
+; CHECK-LABEL: load_ctpop_i128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: popcntq 8(%rdi), %rcx
+; CHECK-NEXT: popcntq (%rdi), %rax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %a0 = load i128, ptr %p0
+ %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctpop_i256(i256 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: popcntq %rcx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: popcntq %rdx, %rcx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: popcntq %rsi, %rdx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: popcntq %rdi, %rax
+; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctpop_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: popcntq 24(%rdi), %rcx
+; SSE-NEXT: popcntq 16(%rdi), %rdx
+; SSE-NEXT: popcntq 8(%rdi), %rsi
+; SSE-NEXT: popcntq (%rdi), %rax
+; SSE-NEXT: addl %ecx, %edx
+; SSE-NEXT: addl %esi, %eax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctpop_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: popcntq 24(%rdi), %rax
+; AVX2-NEXT: popcntq 16(%rdi), %rcx
+; AVX2-NEXT: addl %eax, %ecx
+; AVX2-NEXT: popcntq 8(%rdi), %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq (%rdi), %rax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: addl %ecx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctpop_i256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: popcntq 24(%rdi), %rax
+; AVX512-NEXT: popcntq 16(%rdi), %rcx
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: popcntq 8(%rdi), %rdx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq (%rdi), %rax
+; AVX512-NEXT: addl %edx, %eax
+; AVX512-NEXT: addl %ecx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i256, ptr %p0
+ %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctpop_i512(i512 %a0) nounwind {
+; CHECK-LABEL: test_ctpop_i512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT: addl %eax, %r10d
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: popcntq %r9, %rax
+; CHECK-NEXT: popcntq %r8, %r8
+; CHECK-NEXT: addl %eax, %r8d
+; CHECK-NEXT: addl %r10d, %r8d
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: popcntq %rcx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: popcntq %rdx, %rcx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: popcntq %rsi, %rdx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: popcntq %rdi, %rax
+; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %r8d, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctpop_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: popcntq 56(%rdi), %rax
+; SSE-NEXT: popcntq 48(%rdi), %rcx
+; SSE-NEXT: popcntq 40(%rdi), %rdx
+; SSE-NEXT: popcntq 32(%rdi), %rsi
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: addl %edx, %esi
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 24(%rdi), %rax
+; SSE-NEXT: addl %ecx, %esi
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: popcntq 16(%rdi), %rcx
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: popcntq 8(%rdi), %rdx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq (%rdi), %rax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: addl %esi, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctpop_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: popcntq 56(%rdi), %rax
+; AVX2-NEXT: popcntq 48(%rdi), %rcx
+; AVX2-NEXT: addl %eax, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq 40(%rdi), %rax
+; AVX2-NEXT: popcntq 32(%rdi), %rdx
+; AVX2-NEXT: addl %eax, %edx
+; AVX2-NEXT: addl %ecx, %edx
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: popcntq 24(%rdi), %rcx
+; AVX2-NEXT: popcntq 16(%rdi), %rsi
+; AVX2-NEXT: popcntq 8(%rdi), %r8
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq (%rdi), %rax
+; AVX2-NEXT: addl %ecx, %esi
+; AVX2-NEXT: addl %r8d, %eax
+; AVX2-NEXT: addl %esi, %eax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctpop_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: popcntq 56(%rdi), %rax
+; AVX512-NEXT: popcntq 48(%rdi), %rcx
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 40(%rdi), %rax
+; AVX512-NEXT: popcntq 32(%rdi), %rdx
+; AVX512-NEXT: addl %eax, %edx
+; AVX512-NEXT: addl %ecx, %edx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 24(%rdi), %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: popcntq 16(%rdi), %rcx
+; AVX512-NEXT: popcntq 8(%rdi), %rsi
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq (%rdi), %rax
+; AVX512-NEXT: addl %esi, %eax
+; AVX512-NEXT: addl %ecx, %eax
+; AVX512-NEXT: addl %edx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i512, ptr %p0
+ %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctpop_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_ctpop_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: addl %eax, %r10d
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: addl %r11d, %eax
+; SSE-NEXT: xorl %r11d, %r11d
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: xorl %ebx, %ebx
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: addl %r10d, %eax
+; SSE-NEXT: addl %r11d, %ebx
+; SSE-NEXT: xorl %r11d, %r11d
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: xorl %r10d, %r10d
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: addl %r11d, %r10d
+; SSE-NEXT: addl %ebx, %r10d
+; SSE-NEXT: xorl %r11d, %r11d
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: xorl %ebx, %ebx
+; SSE-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: addl %eax, %r10d
+; SSE-NEXT: addl %r11d, %ebx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq %r9, %rax
+; SSE-NEXT: popcntq %r8, %r8
+; SSE-NEXT: addl %eax, %r8d
+; SSE-NEXT: addl %ebx, %r8d
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq %rcx, %rax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: popcntq %rdx, %rcx
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: popcntq %rsi, %rdx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq %rdi, %rax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: addl %r8d, %eax
+; SSE-NEXT: addl %r10d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ctpop_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: addl %eax, %r10d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: addl %eax, %r11d
+; AVX2-NEXT: addl %r10d, %r11d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: xorl %ebx, %ebx
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r14
+; AVX2-NEXT: addl %eax, %ebx
+; AVX2-NEXT: xorl %r10d, %r10d
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: addl %r14d, %r10d
+; AVX2-NEXT: addl %ebx, %r10d
+; AVX2-NEXT: addl %r11d, %r10d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: xorl %r11d, %r11d
+; AVX2-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: addl %eax, %r11d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq %r9, %rax
+; AVX2-NEXT: popcntq %r8, %r8
+; AVX2-NEXT: addl %eax, %r8d
+; AVX2-NEXT: addl %r11d, %r8d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq %rcx, %rax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: popcntq %rdx, %rcx
+; AVX2-NEXT: addl %eax, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: popcntq %rsi, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq %rdi, %rax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: addl %ecx, %eax
+; AVX2-NEXT: addl %r8d, %eax
+; AVX2-NEXT: addl %r10d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ctpop_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: addl %eax, %r10d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: addl %eax, %r11d
+; AVX512-NEXT: addl %r10d, %r11d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: xorl %ebx, %ebx
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT: xorl %r14d, %r14d
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r14
+; AVX512-NEXT: addl %eax, %ebx
+; AVX512-NEXT: xorl %r10d, %r10d
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: addl %r14d, %r10d
+; AVX512-NEXT: addl %ebx, %r10d
+; AVX512-NEXT: addl %r11d, %r10d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: xorl %r11d, %r11d
+; AVX512-NEXT: popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: addl %eax, %r11d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq %r9, %rax
+; AVX512-NEXT: popcntq %r8, %r8
+; AVX512-NEXT: addl %eax, %r8d
+; AVX512-NEXT: addl %r11d, %r8d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq %rcx, %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: popcntq %rdx, %rcx
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: popcntq %rsi, %rdx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq %rdi, %rax
+; AVX512-NEXT: addl %edx, %eax
+; AVX512-NEXT: addl %ecx, %eax
+; AVX512-NEXT: addl %r8d, %eax
+; AVX512-NEXT: addl %r10d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: retq
+ %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctpop_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_ctpop_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: popcntq 120(%rdi), %rax
+; SSE-NEXT: popcntq 112(%rdi), %rcx
+; SSE-NEXT: popcntq 104(%rdi), %rdx
+; SSE-NEXT: popcntq 96(%rdi), %rsi
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: addl %edx, %esi
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 88(%rdi), %rax
+; SSE-NEXT: addl %ecx, %esi
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: popcntq 80(%rdi), %rdx
+; SSE-NEXT: addl %eax, %edx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 72(%rdi), %rax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: popcntq 64(%rdi), %rcx
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: addl %edx, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 56(%rdi), %rax
+; SSE-NEXT: addl %esi, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: popcntq 48(%rdi), %rdx
+; SSE-NEXT: addl %eax, %edx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 40(%rdi), %rax
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: popcntq 32(%rdi), %rsi
+; SSE-NEXT: addl %eax, %esi
+; SSE-NEXT: addl %edx, %esi
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq 24(%rdi), %rax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: popcntq 16(%rdi), %rdx
+; SSE-NEXT: popcntq 8(%rdi), %r8
+; SSE-NEXT: addl %eax, %edx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq (%rdi), %rax
+; SSE-NEXT: addl %r8d, %eax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: addl %esi, %eax
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctpop_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: popcntq 120(%rdi), %rax
+; AVX2-NEXT: popcntq 112(%rdi), %rcx
+; AVX2-NEXT: addl %eax, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq 104(%rdi), %rax
+; AVX2-NEXT: popcntq 96(%rdi), %rdx
+; AVX2-NEXT: addl %eax, %edx
+; AVX2-NEXT: addl %ecx, %edx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq 88(%rdi), %rax
+; AVX2-NEXT: popcntq 80(%rdi), %rsi
+; AVX2-NEXT: popcntq 72(%rdi), %r8
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: popcntq 64(%rdi), %rcx
+; AVX2-NEXT: addl %eax, %esi
+; AVX2-NEXT: addl %r8d, %ecx
+; AVX2-NEXT: addl %esi, %ecx
+; AVX2-NEXT: addl %edx, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq 56(%rdi), %rax
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: popcntq 48(%rdi), %rdx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: popcntq 40(%rdi), %rsi
+; AVX2-NEXT: xorl %r8d, %r8d
+; AVX2-NEXT: popcntq 32(%rdi), %r8
+; AVX2-NEXT: addl %eax, %edx
+; AVX2-NEXT: addl %esi, %r8d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq 24(%rdi), %rax
+; AVX2-NEXT: addl %edx, %r8d
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: popcntq 16(%rdi), %rdx
+; AVX2-NEXT: addl %eax, %edx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: popcntq 8(%rdi), %rsi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq (%rdi), %rax
+; AVX2-NEXT: addl %esi, %eax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: addl %r8d, %eax
+; AVX2-NEXT: addl %ecx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctpop_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: popcntq 120(%rdi), %rax
+; AVX512-NEXT: popcntq 112(%rdi), %rcx
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 104(%rdi), %rax
+; AVX512-NEXT: popcntq 96(%rdi), %rdx
+; AVX512-NEXT: addl %eax, %edx
+; AVX512-NEXT: addl %ecx, %edx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 88(%rdi), %rax
+; AVX512-NEXT: popcntq 80(%rdi), %rsi
+; AVX512-NEXT: popcntq 72(%rdi), %r8
+; AVX512-NEXT: addl %eax, %esi
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: popcntq 64(%rdi), %rcx
+; AVX512-NEXT: addl %r8d, %ecx
+; AVX512-NEXT: addl %esi, %ecx
+; AVX512-NEXT: addl %edx, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 56(%rdi), %rax
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: popcntq 48(%rdi), %rdx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: popcntq 40(%rdi), %rsi
+; AVX512-NEXT: addl %eax, %edx
+; AVX512-NEXT: xorl %r8d, %r8d
+; AVX512-NEXT: popcntq 32(%rdi), %r8
+; AVX512-NEXT: addl %esi, %r8d
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq 24(%rdi), %rax
+; AVX512-NEXT: addl %edx, %r8d
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: popcntq 16(%rdi), %rdx
+; AVX512-NEXT: addl %eax, %edx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: popcntq 8(%rdi), %rsi
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: popcntq (%rdi), %rax
+; AVX512-NEXT: addl %esi, %eax
+; AVX512-NEXT: addl %edx, %eax
+; AVX512-NEXT: addl %r8d, %eax
+; AVX512-NEXT: addl %ecx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i1024, ptr %p0
+ %cnt = call i1024 @llvm.ctpop.i1024(i1024 %a0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
+
+;
+; CTLZ
+;
+
+define i32 @test_ctlz_i128(i128 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: bsrq %rsi, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rdi, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ctlz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: lzcntq %rsi, %rcx
+; AVX2-NEXT: lzcntq %rdi, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ctlz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: lzcntq %rsi, %rcx
+; AVX512-NEXT: lzcntq %rdi, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctlz_i128(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: bsrq %rcx, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq (%rdi), %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctlz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq 8(%rdi), %rcx
+; AVX2-NEXT: lzcntq %rcx, %rdx
+; AVX2-NEXT: lzcntq (%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctlz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq 8(%rdi), %rcx
+; AVX512-NEXT: lzcntq %rcx, %rdx
+; AVX512-NEXT: lzcntq (%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %edx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i128, ptr %p0
+ %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctlz_i256(i256 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: bsrq %rcx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %rdx, %r8
+; SSE-NEXT: xorl $63, %r8d
+; SSE-NEXT: orl $64, %r8d
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %eax, %r8d
+; SSE-NEXT: bsrq %rsi, %r9
+; SSE-NEXT: xorl $63, %r9d
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rdi, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %r9d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: cmovnel %r8d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ctlz_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: lzcntq %rcx, %rax
+; AVX2-NEXT: lzcntq %rdx, %r8
+; AVX2-NEXT: addl $64, %r8d
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %eax, %r8d
+; AVX2-NEXT: lzcntq %rsi, %r9
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdi, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %r9d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: cmovnel %r8d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ctlz_i256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: lzcntq %rcx, %rax
+; AVX512-NEXT: lzcntq %rdx, %r8
+; AVX512-NEXT: addl $64, %r8d
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %eax, %r8d
+; AVX512-NEXT: lzcntq %rsi, %r9
+; AVX512-NEXT: lzcntq %rdi, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %r9d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rcx, %rdx
+; AVX512-NEXT: cmovnel %r8d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctlz_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: movq 16(%rdi), %rcx
+; SSE-NEXT: movq 24(%rdi), %rdx
+; SSE-NEXT: bsrq %rdx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %rcx, %rsi
+; SSE-NEXT: xorl $63, %esi
+; SSE-NEXT: orl $64, %esi
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %esi
+; SSE-NEXT: movq 8(%rdi), %r8
+; SSE-NEXT: bsrq %r8, %r9
+; SSE-NEXT: xorl $63, %r9d
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq (%rdi), %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %r9d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rdx, %rcx
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctlz_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq 16(%rdi), %rcx
+; AVX2-NEXT: movq 24(%rdi), %rdx
+; AVX2-NEXT: lzcntq %rdx, %rax
+; AVX2-NEXT: lzcntq %rcx, %rsi
+; AVX2-NEXT: addl $64, %esi
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %esi
+; AVX2-NEXT: movq 8(%rdi), %r8
+; AVX2-NEXT: lzcntq %r8, %r9
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq (%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %r9d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: cmovnel %esi, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctlz_i256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq 8(%rdi), %rcx
+; AVX512-NEXT: movq 16(%rdi), %rdx
+; AVX512-NEXT: movq 24(%rdi), %rsi
+; AVX512-NEXT: lzcntq %rsi, %rax
+; AVX512-NEXT: lzcntq %rdx, %r8
+; AVX512-NEXT: addl $64, %r8d
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %eax, %r8d
+; AVX512-NEXT: lzcntq %rcx, %r9
+; AVX512-NEXT: lzcntq (%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %r9d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rsi, %rdx
+; AVX512-NEXT: cmovnel %r8d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i256, ptr %p0
+ %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctlz_i512(i512 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: bsrq %r11, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r10, %r14
+; SSE-NEXT: xorl $63, %r14d
+; SSE-NEXT: orl $64, %r14d
+; SSE-NEXT: testq %r11, %r11
+; SSE-NEXT: cmovnel %eax, %r14d
+; SSE-NEXT: bsrq %r9, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r8, %rbx
+; SSE-NEXT: xorl $63, %ebx
+; SSE-NEXT: orl $64, %ebx
+; SSE-NEXT: testq %r9, %r9
+; SSE-NEXT: cmovnel %eax, %ebx
+; SSE-NEXT: subl $-128, %ebx
+; SSE-NEXT: movq %r10, %rax
+; SSE-NEXT: orq %r11, %rax
+; SSE-NEXT: cmovnel %r14d, %ebx
+; SSE-NEXT: bsrq %rcx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %rdx, %r14
+; SSE-NEXT: xorl $63, %r14d
+; SSE-NEXT: orl $64, %r14d
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %eax, %r14d
+; SSE-NEXT: bsrq %rsi, %r15
+; SSE-NEXT: xorl $63, %r15d
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rdi, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: cmovnel %r14d, %eax
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r11, %r9
+; SSE-NEXT: orq %r10, %r8
+; SSE-NEXT: orq %r9, %r8
+; SSE-NEXT: cmovnel %ebx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ctlz_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: lzcntq %r11, %rax
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: lzcntq %r10, %r14
+; AVX2-NEXT: addl $64, %r14d
+; AVX2-NEXT: testq %r11, %r11
+; AVX2-NEXT: cmovnel %eax, %r14d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %r9, %rax
+; AVX2-NEXT: xorl %ebx, %ebx
+; AVX2-NEXT: lzcntq %r8, %rbx
+; AVX2-NEXT: addl $64, %ebx
+; AVX2-NEXT: testq %r9, %r9
+; AVX2-NEXT: cmovnel %eax, %ebx
+; AVX2-NEXT: subl $-128, %ebx
+; AVX2-NEXT: movq %r10, %rax
+; AVX2-NEXT: orq %r11, %rax
+; AVX2-NEXT: cmovnel %r14d, %ebx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rcx, %rax
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: lzcntq %rdx, %r14
+; AVX2-NEXT: addl $64, %r14d
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %eax, %r14d
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: lzcntq %rsi, %r15
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdi, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %r15d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: cmovnel %r14d, %eax
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r11, %r9
+; AVX2-NEXT: orq %r10, %r8
+; AVX2-NEXT: orq %r9, %r8
+; AVX2-NEXT: cmovnel %ebx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ctlz_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: lzcntq %r11, %rax
+; AVX512-NEXT: lzcntq %r10, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: lzcntq %r9, %rax
+; AVX512-NEXT: lzcntq %r8, %rbx
+; AVX512-NEXT: addl $64, %ebx
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: cmovnel %eax, %ebx
+; AVX512-NEXT: subl $-128, %ebx
+; AVX512-NEXT: movq %r10, %rax
+; AVX512-NEXT: orq %r11, %rax
+; AVX512-NEXT: cmovnel %r14d, %ebx
+; AVX512-NEXT: lzcntq %rcx, %rax
+; AVX512-NEXT: lzcntq %rdx, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: lzcntq %rsi, %r15
+; AVX512-NEXT: lzcntq %rdi, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %r15d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rcx, %rdx
+; AVX512-NEXT: cmovnel %r14d, %eax
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r11, %r9
+; AVX512-NEXT: orq %r10, %r8
+; AVX512-NEXT: orq %r9, %r8
+; AVX512-NEXT: cmovnel %ebx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: retq
+ %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctlz_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq 8(%rdi), %r10
+; SSE-NEXT: movq 16(%rdi), %r9
+; SSE-NEXT: movq 32(%rdi), %rcx
+; SSE-NEXT: movq 40(%rdi), %rdx
+; SSE-NEXT: movq 48(%rdi), %rsi
+; SSE-NEXT: movq 56(%rdi), %r8
+; SSE-NEXT: bsrq %r8, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %rsi, %r14
+; SSE-NEXT: xorl $63, %r14d
+; SSE-NEXT: orl $64, %r14d
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %eax, %r14d
+; SSE-NEXT: bsrq %rdx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %rcx, %r11
+; SSE-NEXT: xorl $63, %r11d
+; SSE-NEXT: orl $64, %r11d
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %r11d
+; SSE-NEXT: movq 24(%rdi), %rbx
+; SSE-NEXT: subl $-128, %r11d
+; SSE-NEXT: movq %rsi, %rax
+; SSE-NEXT: orq %r8, %rax
+; SSE-NEXT: cmovnel %r14d, %r11d
+; SSE-NEXT: bsrq %rbx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r9, %r14
+; SSE-NEXT: xorl $63, %r14d
+; SSE-NEXT: orl $64, %r14d
+; SSE-NEXT: testq %rbx, %rbx
+; SSE-NEXT: cmovnel %eax, %r14d
+; SSE-NEXT: bsrq %r10, %r15
+; SSE-NEXT: xorl $63, %r15d
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq (%rdi), %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rbx, %r9
+; SSE-NEXT: cmovnel %r14d, %eax
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r8, %rdx
+; SSE-NEXT: orq %rsi, %rcx
+; SSE-NEXT: orq %rdx, %rcx
+; SSE-NEXT: cmovnel %r11d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctlz_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq 8(%rdi), %r10
+; AVX2-NEXT: movq 16(%rdi), %r9
+; AVX2-NEXT: movq 32(%rdi), %rcx
+; AVX2-NEXT: movq 40(%rdi), %rdx
+; AVX2-NEXT: movq 48(%rdi), %rsi
+; AVX2-NEXT: movq 56(%rdi), %r8
+; AVX2-NEXT: lzcntq %r8, %rax
+; AVX2-NEXT: xorl %ebx, %ebx
+; AVX2-NEXT: lzcntq %rsi, %rbx
+; AVX2-NEXT: addl $64, %ebx
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %eax, %ebx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdx, %rax
+; AVX2-NEXT: lzcntq %rcx, %r11
+; AVX2-NEXT: addl $64, %r11d
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %r11d
+; AVX2-NEXT: subl $-128, %r11d
+; AVX2-NEXT: movq %rsi, %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: cmovnel %ebx, %r11d
+; AVX2-NEXT: movq 24(%rdi), %rbx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rbx, %rax
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: lzcntq %r9, %r14
+; AVX2-NEXT: addl $64, %r14d
+; AVX2-NEXT: testq %rbx, %rbx
+; AVX2-NEXT: cmovnel %eax, %r14d
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: lzcntq %r10, %r15
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq (%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %r15d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rbx, %r9
+; AVX2-NEXT: cmovnel %r14d, %eax
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r8, %rdx
+; AVX2-NEXT: orq %rsi, %rcx
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: cmovnel %r11d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctlz_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq 8(%rdi), %r11
+; AVX512-NEXT: movq 16(%rdi), %r9
+; AVX512-NEXT: movq 24(%rdi), %r10
+; AVX512-NEXT: movq 32(%rdi), %rcx
+; AVX512-NEXT: movq 40(%rdi), %rdx
+; AVX512-NEXT: movq 48(%rdi), %rsi
+; AVX512-NEXT: movq 56(%rdi), %r8
+; AVX512-NEXT: lzcntq %r8, %rax
+; AVX512-NEXT: lzcntq %rsi, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: lzcntq %rdx, %rax
+; AVX512-NEXT: lzcntq %rcx, %rbx
+; AVX512-NEXT: addl $64, %ebx
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %ebx
+; AVX512-NEXT: subl $-128, %ebx
+; AVX512-NEXT: movq %rsi, %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: cmovnel %r14d, %ebx
+; AVX512-NEXT: lzcntq %r10, %rax
+; AVX512-NEXT: lzcntq %r9, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: lzcntq (%rdi), %rax
+; AVX512-NEXT: lzcntq %r11, %rdi
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: cmovnel %edi, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r10, %r9
+; AVX512-NEXT: cmovnel %r14d, %eax
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r8, %rdx
+; AVX512-NEXT: orq %rsi, %rcx
+; AVX512-NEXT: orq %rdx, %rcx
+; AVX512-NEXT: cmovnel %ebx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: retq
+ %a0 = load i512, ptr %p0
+ %cnt = call i512 @llvm.ctlz.i512(i512 %a0, i1 0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_ctlz_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_ctlz_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq %r9, %r11
+; SSE-NEXT: movq %r8, %r9
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq %rdx, %r12
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: bsrq %r8, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r15, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: bsrq %r14, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: bsrq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: orl $64, %eax
+; SSE-NEXT: testq %r14, %r14
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: movq %r15, %rdx
+; SSE-NEXT: orq %r8, %rdx
+; SSE-NEXT: movq %r8, %r14
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: bsrq %r13, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: bsrq %rbx, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: orl $64, %edx
+; SSE-NEXT: testq %r13, %r13
+; SSE-NEXT: cmovnel %ecx, %edx
+; SSE-NEXT: bsrq %r10, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: bsrq %r8, %rbp
+; SSE-NEXT: xorl $63, %ebp
+; SSE-NEXT: orl $64, %ebp
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %ecx, %ebp
+; SSE-NEXT: subl $-128, %ebp
+; SSE-NEXT: movq %rbx, %rcx
+; SSE-NEXT: orq %r13, %rcx
+; SSE-NEXT: cmovnel %edx, %ebp
+; SSE-NEXT: addl $256, %ebp # imm = 0x100
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: orq %r14, %rcx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: orq %r15, %rdx
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: cmovnel %eax, %ebp
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
+; SSE-NEXT: bsrq %r14, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT: bsrq %r15, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r14, %r14
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: bsrq %r11, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r9, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: orl $64, %edx
+; SSE-NEXT: testq %r11, %r11
+; SSE-NEXT: cmovnel %eax, %edx
+; SSE-NEXT: subl $-128, %edx
+; SSE-NEXT: movq %r15, %rax
+; SSE-NEXT: orq %r14, %rax
+; SSE-NEXT: cmovnel %ecx, %edx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: bsrq %r15, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r12, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r15, %r15
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rdi, %rax
+; SSE-NEXT: bsrq %rsi, %rdi
+; SSE-NEXT: xorl $63, %edi
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %edi, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %r15, %r12
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: orq %r14, %r11
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r9
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r11, %r9
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: orq %r13, %r10
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: orq %rbx, %r8
+; SSE-NEXT: addl $512, %eax # imm = 0x200
+; SSE-NEXT: orq %r10, %r8
+; SSE-NEXT: cmovnel %ebp, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ctlz_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq %r9, %r14
+; AVX2-NEXT: movq %r8, %r11
+; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %r12, %rcx
+; AVX2-NEXT: xorl %r9d, %r9d
+; AVX2-NEXT: lzcntq %r8, %r9
+; AVX2-NEXT: addl $64, %r9d
+; AVX2-NEXT: testq %r12, %r12
+; AVX2-NEXT: cmovnel %ecx, %r9d
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: lzcntq %r10, %rsi
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %rax, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %esi, %ecx
+; AVX2-NEXT: subl $-128, %ecx
+; AVX2-NEXT: movq %r8, %rsi
+; AVX2-NEXT: orq %r12, %rsi
+; AVX2-NEXT: cmovnel %r9d, %ecx
+; AVX2-NEXT: xorl %edi, %edi
+; AVX2-NEXT: lzcntq %rbx, %rdi
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: lzcntq %r15, %rsi
+; AVX2-NEXT: addl $64, %esi
+; AVX2-NEXT: testq %rbx, %rbx
+; AVX2-NEXT: cmovnel %edi, %esi
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT: xorl %ebp, %ebp
+; AVX2-NEXT: lzcntq %r13, %rbp
+; AVX2-NEXT: addl $64, %ebp
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; AVX2-NEXT: xorl %edi, %edi
+; AVX2-NEXT: lzcntq %r9, %rdi
+; AVX2-NEXT: testq %r9, %r9
+; AVX2-NEXT: cmovnel %edi, %ebp
+; AVX2-NEXT: subl $-128, %ebp
+; AVX2-NEXT: movq %r15, %rdi
+; AVX2-NEXT: orq %rbx, %rdi
+; AVX2-NEXT: cmovnel %esi, %ebp
+; AVX2-NEXT: addl $256, %ebp # imm = 0x100
+; AVX2-NEXT: movq %r10, %rdi
+; AVX2-NEXT: orq %r12, %rdi
+; AVX2-NEXT: movq %rax, %rsi
+; AVX2-NEXT: orq %r8, %rsi
+; AVX2-NEXT: orq %rdi, %rsi
+; AVX2-NEXT: cmovnel %ecx, %ebp
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdi, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %r12, %rcx
+; AVX2-NEXT: testq %r12, %r12
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %r11, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: lzcntq %r14, %rsi
+; AVX2-NEXT: testq %r14, %r14
+; AVX2-NEXT: cmovnel %esi, %ecx
+; AVX2-NEXT: subl $-128, %ecx
+; AVX2-NEXT: movq %rdi, %rsi
+; AVX2-NEXT: orq %r12, %rsi
+; AVX2-NEXT: cmovnel %eax, %ecx
+; AVX2-NEXT: movq %rdx, %rdi
+; AVX2-NEXT: lzcntq %rdx, %rdx
+; AVX2-NEXT: addl $64, %edx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %r10, %rax
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %eax, %edx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: lzcntq %rax, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; AVX2-NEXT: lzcntq %rsi, %r8
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %r8d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %r10, %rdi
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: orq %r12, %r14
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r14, %r11
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r9
+; AVX2-NEXT: orq %rbx, %r9
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r15
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT: orq %r15, %r13
+; AVX2-NEXT: addl $512, %eax # imm = 0x200
+; AVX2-NEXT: orq %r9, %r13
+; AVX2-NEXT: cmovnel %ebp, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ctlz_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq %r9, %r14
+; AVX512-NEXT: movq %r8, %r11
+; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT: lzcntq %r12, %rcx
+; AVX512-NEXT: lzcntq %r8, %r9
+; AVX512-NEXT: addl $64, %r9d
+; AVX512-NEXT: testq %r12, %r12
+; AVX512-NEXT: cmovnel %ecx, %r9d
+; AVX512-NEXT: lzcntq %r10, %rsi
+; AVX512-NEXT: lzcntq %rax, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %esi, %ecx
+; AVX512-NEXT: subl $-128, %ecx
+; AVX512-NEXT: movq %r8, %rsi
+; AVX512-NEXT: orq %r12, %rsi
+; AVX512-NEXT: cmovnel %r9d, %ecx
+; AVX512-NEXT: lzcntq %rbx, %rdi
+; AVX512-NEXT: lzcntq %r15, %rsi
+; AVX512-NEXT: addl $64, %esi
+; AVX512-NEXT: testq %rbx, %rbx
+; AVX512-NEXT: cmovnel %edi, %esi
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT: lzcntq %r13, %rbp
+; AVX512-NEXT: addl $64, %ebp
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT: lzcntq %r9, %rdi
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: cmovnel %edi, %ebp
+; AVX512-NEXT: subl $-128, %ebp
+; AVX512-NEXT: movq %r15, %rdi
+; AVX512-NEXT: orq %rbx, %rdi
+; AVX512-NEXT: cmovnel %esi, %ebp
+; AVX512-NEXT: addl $256, %ebp # imm = 0x100
+; AVX512-NEXT: movq %r10, %rdi
+; AVX512-NEXT: orq %r12, %rdi
+; AVX512-NEXT: movq %rax, %rsi
+; AVX512-NEXT: orq %r8, %rsi
+; AVX512-NEXT: orq %rdi, %rsi
+; AVX512-NEXT: cmovnel %ecx, %ebp
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; AVX512-NEXT: lzcntq %rdi, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT: lzcntq %r12, %rcx
+; AVX512-NEXT: testq %r12, %r12
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: lzcntq %r11, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: lzcntq %r14, %rsi
+; AVX512-NEXT: testq %r14, %r14
+; AVX512-NEXT: cmovnel %esi, %ecx
+; AVX512-NEXT: subl $-128, %ecx
+; AVX512-NEXT: movq %rdi, %rsi
+; AVX512-NEXT: orq %r12, %rsi
+; AVX512-NEXT: cmovnel %eax, %ecx
+; AVX512-NEXT: movq %rdx, %rdi
+; AVX512-NEXT: lzcntq %rdx, %rdx
+; AVX512-NEXT: addl $64, %edx
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: lzcntq %r10, %rax
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %eax, %edx
+; AVX512-NEXT: lzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; AVX512-NEXT: lzcntq %rsi, %r8
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %r8d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r10, %rdi
+; AVX512-NEXT: cmovnel %edx, %eax
+; AVX512-NEXT: orq %r12, %r14
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r14, %r11
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT: orq %rbx, %r9
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT: orq %r15, %r13
+; AVX512-NEXT: addl $512, %eax # imm = 0x200
+; AVX512-NEXT: orq %r9, %r13
+; AVX512-NEXT: cmovnel %ebp, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: retq
+ %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_ctlz_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_ctlz_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq 40(%rdi), %rbp
+; SSE-NEXT: movq 64(%rdi), %rbx
+; SSE-NEXT: movq 72(%rdi), %r11
+; SSE-NEXT: movq 80(%rdi), %r12
+; SSE-NEXT: movq 88(%rdi), %r14
+; SSE-NEXT: movq 96(%rdi), %rsi
+; SSE-NEXT: movq 104(%rdi), %r9
+; SSE-NEXT: movq 112(%rdi), %r10
+; SSE-NEXT: movq 120(%rdi), %r8
+; SSE-NEXT: bsrq %r8, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r10, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: bsrq %r9, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: bsrq %rsi, %rax
+; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: orl $64, %eax
+; SSE-NEXT: testq %r9, %r9
+; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: movq %r10, %rdx
+; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: orq %r8, %rdx
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: bsrq %r14, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: bsrq %r12, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: orl $64, %edx
+; SSE-NEXT: testq %r14, %r14
+; SSE-NEXT: cmovnel %ecx, %edx
+; SSE-NEXT: bsrq %r11, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: bsrq %rbx, %r15
+; SSE-NEXT: xorl $63, %r15d
+; SSE-NEXT: orl $64, %r15d
+; SSE-NEXT: testq %r11, %r11
+; SSE-NEXT: cmovnel %ecx, %r15d
+; SSE-NEXT: subl $-128, %r15d
+; SSE-NEXT: movq %r12, %rcx
+; SSE-NEXT: orq %r14, %rcx
+; SSE-NEXT: cmovnel %edx, %r15d
+; SSE-NEXT: movq 48(%rdi), %r12
+; SSE-NEXT: addl $256, %r15d # imm = 0x100
+; SSE-NEXT: movq %r9, %rcx
+; SSE-NEXT: orq %r8, %rcx
+; SSE-NEXT: movq %rsi, %rdx
+; SSE-NEXT: orq %r10, %rdx
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: movq 56(%rdi), %r13
+; SSE-NEXT: cmovnel %eax, %r15d
+; SSE-NEXT: bsrq %r13, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: bsrq %r12, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r13, %r13
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: movq %rbp, %r10
+; SSE-NEXT: bsrq %rbp, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: movq 32(%rdi), %r8
+; SSE-NEXT: bsrq %r8, %rbp
+; SSE-NEXT: xorl $63, %ebp
+; SSE-NEXT: orl $64, %ebp
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %eax, %ebp
+; SSE-NEXT: subl $-128, %ebp
+; SSE-NEXT: movq %r12, %rax
+; SSE-NEXT: orq %r13, %rax
+; SSE-NEXT: cmovnel %ecx, %ebp
+; SSE-NEXT: movq 24(%rdi), %r9
+; SSE-NEXT: bsrq %r9, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: movq 16(%rdi), %rsi
+; SSE-NEXT: bsrq %rsi, %rcx
+; SSE-NEXT: xorl $63, %ecx
+; SSE-NEXT: orl $64, %ecx
+; SSE-NEXT: testq %r9, %r9
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rdi
+; SSE-NEXT: bsrq %rdi, %rdx
+; SSE-NEXT: xorl $63, %edx
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %r9, %rsi
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: orq %r13, %r10
+; SSE-NEXT: orq %r12, %r8
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r10, %r8
+; SSE-NEXT: cmovnel %ebp, %eax
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; SSE-NEXT: orq %r14, %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; SSE-NEXT: orq %rcx, %rbx
+; SSE-NEXT: addl $512, %eax # imm = 0x200
+; SSE-NEXT: orq %r11, %rbx
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_ctlz_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq 48(%rdi), %r9
+; AVX2-NEXT: movq 56(%rdi), %rbp
+; AVX2-NEXT: movq 64(%rdi), %r11
+; AVX2-NEXT: movq 72(%rdi), %r10
+; AVX2-NEXT: movq 80(%rdi), %r14
+; AVX2-NEXT: movq 88(%rdi), %rbx
+; AVX2-NEXT: movq 96(%rdi), %rdx
+; AVX2-NEXT: movq 104(%rdi), %r8
+; AVX2-NEXT: movq 112(%rdi), %rsi
+; AVX2-NEXT: movq 120(%rdi), %r15
+; AVX2-NEXT: lzcntq %r15, %rax
+; AVX2-NEXT: lzcntq %rsi, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: testq %r15, %r15
+; AVX2-NEXT: cmovnel %eax, %ecx
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: lzcntq %r8, %r12
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdx, %rax
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: cmovnel %r12d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: movq %rsi, %r12
+; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: orq %r15, %r12
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %rbx, %rcx
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: xorl %r13d, %r13d
+; AVX2-NEXT: lzcntq %r14, %r13
+; AVX2-NEXT: addl $64, %r13d
+; AVX2-NEXT: testq %rbx, %rbx
+; AVX2-NEXT: cmovnel %ecx, %r13d
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %r10, %rcx
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: lzcntq %r11, %r12
+; AVX2-NEXT: addl $64, %r12d
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %ecx, %r12d
+; AVX2-NEXT: subl $-128, %r12d
+; AVX2-NEXT: movq %r14, %rcx
+; AVX2-NEXT: orq %rbx, %rcx
+; AVX2-NEXT: cmovnel %r13d, %r12d
+; AVX2-NEXT: addl $256, %r12d # imm = 0x100
+; AVX2-NEXT: movq %r8, %rcx
+; AVX2-NEXT: orq %r15, %rcx
+; AVX2-NEXT: orq %rsi, %rdx
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: cmovnel %eax, %r12d
+; AVX2-NEXT: movq %rbp, %r14
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %rbp, %rcx
+; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %r9, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rbp, %rbp
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: movq 32(%rdi), %r13
+; AVX2-NEXT: xorl %ebp, %ebp
+; AVX2-NEXT: lzcntq %r13, %rbp
+; AVX2-NEXT: addl $64, %ebp
+; AVX2-NEXT: movq 40(%rdi), %r8
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: lzcntq %r8, %rdx
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %edx, %ebp
+; AVX2-NEXT: subl $-128, %ebp
+; AVX2-NEXT: movq %r9, %rdx
+; AVX2-NEXT: orq %r14, %rdx
+; AVX2-NEXT: cmovnel %eax, %ebp
+; AVX2-NEXT: movq 16(%rdi), %r9
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: lzcntq %r9, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: movq 24(%rdi), %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq %rdx, %rax
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %ecx
+; AVX2-NEXT: movq 8(%rdi), %rsi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: lzcntq (%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: lzcntq %rsi, %rdi
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %edi, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rdx, %r9
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: orq %r14, %r8
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r8, %r13
+; AVX2-NEXT: cmovnel %ebp, %eax
+; AVX2-NEXT: orq %r15, %rbx
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; AVX2-NEXT: orq %rbx, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX2-NEXT: orq %rcx, %r11
+; AVX2-NEXT: addl $512, %eax # imm = 0x200
+; AVX2-NEXT: orq %r10, %r11
+; AVX2-NEXT: cmovnel %r12d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_ctlz_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq 32(%rdi), %r14
+; AVX512-NEXT: movq 48(%rdi), %rbp
+; AVX512-NEXT: movq 64(%rdi), %r11
+; AVX512-NEXT: movq 72(%rdi), %r10
+; AVX512-NEXT: movq 80(%rdi), %rdx
+; AVX512-NEXT: movq 88(%rdi), %rbx
+; AVX512-NEXT: movq 96(%rdi), %rsi
+; AVX512-NEXT: movq 104(%rdi), %r9
+; AVX512-NEXT: movq 112(%rdi), %r8
+; AVX512-NEXT: movq 120(%rdi), %r15
+; AVX512-NEXT: lzcntq %r15, %rax
+; AVX512-NEXT: lzcntq %r8, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: testq %r15, %r15
+; AVX512-NEXT: cmovnel %eax, %ecx
+; AVX512-NEXT: lzcntq %r9, %r12
+; AVX512-NEXT: lzcntq %rsi, %rax
+; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: movq %r8, %r12
+; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: orq %r15, %r12
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: lzcntq %rbx, %rcx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: lzcntq %rdx, %r13
+; AVX512-NEXT: addl $64, %r13d
+; AVX512-NEXT: testq %rbx, %rbx
+; AVX512-NEXT: cmovnel %ecx, %r13d
+; AVX512-NEXT: lzcntq %r10, %rcx
+; AVX512-NEXT: lzcntq %r11, %r12
+; AVX512-NEXT: addl $64, %r12d
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %ecx, %r12d
+; AVX512-NEXT: subl $-128, %r12d
+; AVX512-NEXT: movq %rdx, %rcx
+; AVX512-NEXT: orq %rbx, %rcx
+; AVX512-NEXT: cmovnel %r13d, %r12d
+; AVX512-NEXT: addl $256, %r12d # imm = 0x100
+; AVX512-NEXT: movq %r9, %rcx
+; AVX512-NEXT: orq %r15, %rcx
+; AVX512-NEXT: orq %r8, %rsi
+; AVX512-NEXT: orq %rcx, %rsi
+; AVX512-NEXT: movq 56(%rdi), %r13
+; AVX512-NEXT: cmovnel %eax, %r12d
+; AVX512-NEXT: lzcntq %r13, %rcx
+; AVX512-NEXT: movq %rbp, %rsi
+; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: lzcntq %rbp, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r13, %r13
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: lzcntq %r14, %rbp
+; AVX512-NEXT: addl $64, %ebp
+; AVX512-NEXT: movq 40(%rdi), %r8
+; AVX512-NEXT: lzcntq %r8, %rdx
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %edx, %ebp
+; AVX512-NEXT: subl $-128, %ebp
+; AVX512-NEXT: movq %rsi, %rdx
+; AVX512-NEXT: orq %r13, %rdx
+; AVX512-NEXT: cmovnel %eax, %ebp
+; AVX512-NEXT: movq 16(%rdi), %r9
+; AVX512-NEXT: lzcntq %r9, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: movq 24(%rdi), %rdx
+; AVX512-NEXT: lzcntq %rdx, %rax
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %ecx
+; AVX512-NEXT: movq 8(%rdi), %rsi
+; AVX512-NEXT: lzcntq (%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: lzcntq %rsi, %rdi
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %edi, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rdx, %r9
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: orq %r13, %r8
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r8, %r14
+; AVX512-NEXT: cmovnel %ebp, %eax
+; AVX512-NEXT: orq %r15, %rbx
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; AVX512-NEXT: orq %rbx, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX512-NEXT: orq %rcx, %r11
+; AVX512-NEXT: addl $512, %eax # imm = 0x200
+; AVX512-NEXT: orq %r10, %r11
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: retq
+ %a0 = load i1024, ptr %p0
+ %cnt = call i1024 @llvm.ctlz.i1024(i1024 %a0, i1 0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
+
+;
+; CTTZ
+;
+
+define i32 @test_cttz_i128(i128 %a0) nounwind {
+; SSE-LABEL: test_cttz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: rep bsfq %rdi, %rcx
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq %rsi, %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_cttz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: tzcntq %rdi, %rcx
+; AVX2-NEXT: tzcntq %rsi, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rdi, %rdi
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_cttz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: tzcntq %rdi, %rcx
+; AVX512-NEXT: tzcntq %rsi, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_cttz_i128(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: rep bsfq %rcx, %rdx
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq 8(%rdi), %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_cttz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: tzcntq %rcx, %rdx
+; AVX2-NEXT: tzcntq 8(%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_cttz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq (%rdi), %rcx
+; AVX512-NEXT: tzcntq %rcx, %rdx
+; AVX512-NEXT: tzcntq 8(%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %edx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i128, ptr %p0
+ %cnt = call i128 @llvm.cttz.i128(i128 %a0, i1 0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_cttz_i256(i256 %a0) nounwind {
+; SSE-LABEL: test_cttz_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: rep bsfq %rdi, %rax
+; SSE-NEXT: rep bsfq %rsi, %r8
+; SSE-NEXT: addl $64, %r8d
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %eax, %r8d
+; SSE-NEXT: rep bsfq %rdx, %r9
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq %rcx, %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %r9d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rsi, %rdi
+; SSE-NEXT: cmovnel %r8d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_cttz_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: tzcntq %rdi, %rax
+; AVX2-NEXT: tzcntq %rsi, %r8
+; AVX2-NEXT: addl $64, %r8d
+; AVX2-NEXT: testq %rdi, %rdi
+; AVX2-NEXT: cmovnel %eax, %r8d
+; AVX2-NEXT: tzcntq %rdx, %r9
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rcx, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %r9d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rsi, %rdi
+; AVX2-NEXT: cmovnel %r8d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_cttz_i256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: tzcntq %rdi, %rax
+; AVX512-NEXT: tzcntq %rsi, %r8
+; AVX512-NEXT: addl $64, %r8d
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: cmovnel %eax, %r8d
+; AVX512-NEXT: tzcntq %rdx, %r9
+; AVX512-NEXT: tzcntq %rcx, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %r9d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rsi, %rdi
+; AVX512-NEXT: cmovnel %r8d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_cttz_i256(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: movq 16(%rdi), %rcx
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq 8(%rdi), %rsi
+; SSE-NEXT: rep bsfq %rdx, %rax
+; SSE-NEXT: rep bsfq %rsi, %r8
+; SSE-NEXT: addl $64, %r8d
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %r8d
+; SSE-NEXT: rep bsfq %rcx, %r9
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq 24(%rdi), %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %r9d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rsi, %rdx
+; SSE-NEXT: cmovnel %r8d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_cttz_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: movq 8(%rdi), %rdx
+; AVX2-NEXT: tzcntq %rcx, %rax
+; AVX2-NEXT: tzcntq %rdx, %rsi
+; AVX2-NEXT: addl $64, %esi
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %eax, %esi
+; AVX2-NEXT: movq 16(%rdi), %r8
+; AVX2-NEXT: tzcntq %r8, %r9
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq 24(%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %r9d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: cmovnel %esi, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_cttz_i256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq 16(%rdi), %rcx
+; AVX512-NEXT: movq (%rdi), %rdx
+; AVX512-NEXT: movq 8(%rdi), %rsi
+; AVX512-NEXT: tzcntq %rdx, %rax
+; AVX512-NEXT: tzcntq %rsi, %r8
+; AVX512-NEXT: addl $64, %r8d
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %r8d
+; AVX512-NEXT: tzcntq %rcx, %r9
+; AVX512-NEXT: tzcntq 24(%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %r9d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %rsi, %rdx
+; AVX512-NEXT: cmovnel %r8d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %a0 = load i256, ptr %p0
+ %cnt = call i256 @llvm.cttz.i256(i256 %a0, i1 0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_cttz_i512(i512 %a0) nounwind {
+; SSE-LABEL: test_cttz_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: rep bsfq %rdi, %rax
+; SSE-NEXT: rep bsfq %rsi, %r11
+; SSE-NEXT: addl $64, %r11d
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %eax, %r11d
+; SSE-NEXT: rep bsfq %rdx, %rax
+; SSE-NEXT: rep bsfq %rcx, %r10
+; SSE-NEXT: addl $64, %r10d
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %r10d
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: subl $-128, %r10d
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: orq %rsi, %rax
+; SSE-NEXT: cmovnel %r11d, %r10d
+; SSE-NEXT: rep bsfq %r8, %rax
+; SSE-NEXT: rep bsfq %r9, %r11
+; SSE-NEXT: addl $64, %r11d
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %eax, %r11d
+; SSE-NEXT: rep bsfq %rbx, %r14
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rbx, %rbx
+; SSE-NEXT: cmovnel %r14d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %r9, %r8
+; SSE-NEXT: cmovnel %r11d, %eax
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %rcx, %rsi
+; SSE-NEXT: orq %rdx, %rdi
+; SSE-NEXT: orq %rsi, %rdi
+; SSE-NEXT: cmovnel %r10d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_cttz_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: tzcntq %rdi, %rax
+; AVX2-NEXT: tzcntq %rsi, %r11
+; AVX2-NEXT: addl $64, %r11d
+; AVX2-NEXT: testq %rdi, %rdi
+; AVX2-NEXT: cmovnel %eax, %r11d
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rdx, %rax
+; AVX2-NEXT: tzcntq %rcx, %r10
+; AVX2-NEXT: addl $64, %r10d
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %r10d
+; AVX2-NEXT: subl $-128, %r10d
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: orq %rsi, %rax
+; AVX2-NEXT: cmovnel %r11d, %r10d
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %r8, %rax
+; AVX2-NEXT: xorl %ebx, %ebx
+; AVX2-NEXT: tzcntq %r9, %rbx
+; AVX2-NEXT: addl $64, %ebx
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %eax, %ebx
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: tzcntq %r11, %r14
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r11, %r11
+; AVX2-NEXT: cmovnel %r14d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %r9, %r8
+; AVX2-NEXT: cmovnel %ebx, %eax
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %rcx, %rsi
+; AVX2-NEXT: orq %rdx, %rdi
+; AVX2-NEXT: orq %rsi, %rdi
+; AVX2-NEXT: cmovnel %r10d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_cttz_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: tzcntq %rdi, %rax
+; AVX512-NEXT: tzcntq %rsi, %rbx
+; AVX512-NEXT: addl $64, %ebx
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: cmovnel %eax, %ebx
+; AVX512-NEXT: tzcntq %rdx, %rax
+; AVX512-NEXT: tzcntq %rcx, %r10
+; AVX512-NEXT: addl $64, %r10d
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %r10d
+; AVX512-NEXT: subl $-128, %r10d
+; AVX512-NEXT: movq %rdi, %rax
+; AVX512-NEXT: orq %rsi, %rax
+; AVX512-NEXT: cmovnel %ebx, %r10d
+; AVX512-NEXT: tzcntq %r8, %rax
+; AVX512-NEXT: tzcntq %r9, %rbx
+; AVX512-NEXT: addl $64, %ebx
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %eax, %ebx
+; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: tzcntq %r11, %r14
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: cmovnel %r14d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r9, %r8
+; AVX512-NEXT: cmovnel %ebx, %eax
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %rcx, %rsi
+; AVX512-NEXT: orq %rdx, %rdi
+; AVX512-NEXT: orq %rsi, %rdi
+; AVX512-NEXT: cmovnel %r10d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: retq
+ %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_cttz_i512(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq 48(%rdi), %r10
+; SSE-NEXT: movq 40(%rdi), %r9
+; SSE-NEXT: movq 24(%rdi), %r8
+; SSE-NEXT: movq 16(%rdi), %rdx
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq 8(%rdi), %rsi
+; SSE-NEXT: rep bsfq %rcx, %rax
+; SSE-NEXT: rep bsfq %rsi, %rbx
+; SSE-NEXT: addl $64, %ebx
+; SSE-NEXT: testq %rcx, %rcx
+; SSE-NEXT: cmovnel %eax, %ebx
+; SSE-NEXT: rep bsfq %rdx, %rax
+; SSE-NEXT: rep bsfq %r8, %r11
+; SSE-NEXT: addl $64, %r11d
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %r11d
+; SSE-NEXT: movq 32(%rdi), %r14
+; SSE-NEXT: subl $-128, %r11d
+; SSE-NEXT: movq %rcx, %rax
+; SSE-NEXT: orq %rsi, %rax
+; SSE-NEXT: cmovnel %ebx, %r11d
+; SSE-NEXT: rep bsfq %r14, %rax
+; SSE-NEXT: rep bsfq %r9, %rbx
+; SSE-NEXT: addl $64, %ebx
+; SSE-NEXT: testq %r14, %r14
+; SSE-NEXT: cmovnel %eax, %ebx
+; SSE-NEXT: rep bsfq %r10, %r15
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq 56(%rdi), %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %r9, %r14
+; SSE-NEXT: cmovnel %ebx, %eax
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r8, %rsi
+; SSE-NEXT: orq %rdx, %rcx
+; SSE-NEXT: orq %rsi, %rcx
+; SSE-NEXT: cmovnel %r11d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_cttz_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq 48(%rdi), %r10
+; AVX2-NEXT: movq 40(%rdi), %r9
+; AVX2-NEXT: movq 24(%rdi), %r8
+; AVX2-NEXT: movq 16(%rdi), %rdx
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: movq 8(%rdi), %rsi
+; AVX2-NEXT: tzcntq %rcx, %rax
+; AVX2-NEXT: xorl %ebx, %ebx
+; AVX2-NEXT: tzcntq %rsi, %rbx
+; AVX2-NEXT: addl $64, %ebx
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %eax, %ebx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rdx, %rax
+; AVX2-NEXT: tzcntq %r8, %r11
+; AVX2-NEXT: addl $64, %r11d
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %r11d
+; AVX2-NEXT: subl $-128, %r11d
+; AVX2-NEXT: movq %rcx, %rax
+; AVX2-NEXT: orq %rsi, %rax
+; AVX2-NEXT: cmovnel %ebx, %r11d
+; AVX2-NEXT: movq 32(%rdi), %rbx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rbx, %rax
+; AVX2-NEXT: xorl %r14d, %r14d
+; AVX2-NEXT: tzcntq %r9, %r14
+; AVX2-NEXT: addl $64, %r14d
+; AVX2-NEXT: testq %rbx, %rbx
+; AVX2-NEXT: cmovnel %eax, %r14d
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: tzcntq %r10, %r15
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq 56(%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %r15d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %r9, %rbx
+; AVX2-NEXT: cmovnel %r14d, %eax
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r8, %rsi
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: orq %rsi, %rcx
+; AVX2-NEXT: cmovnel %r11d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_cttz_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq 48(%rdi), %r11
+; AVX512-NEXT: movq 40(%rdi), %r9
+; AVX512-NEXT: movq 32(%rdi), %r10
+; AVX512-NEXT: movq 24(%rdi), %r8
+; AVX512-NEXT: movq 16(%rdi), %rdx
+; AVX512-NEXT: movq (%rdi), %rcx
+; AVX512-NEXT: movq 8(%rdi), %rsi
+; AVX512-NEXT: tzcntq %rcx, %rax
+; AVX512-NEXT: tzcntq %rsi, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: tzcntq %rdx, %rax
+; AVX512-NEXT: tzcntq %r8, %rbx
+; AVX512-NEXT: addl $64, %ebx
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %ebx
+; AVX512-NEXT: subl $-128, %ebx
+; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: orq %rsi, %rax
+; AVX512-NEXT: cmovnel %r14d, %ebx
+; AVX512-NEXT: tzcntq %r10, %rax
+; AVX512-NEXT: tzcntq %r9, %r14
+; AVX512-NEXT: addl $64, %r14d
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %eax, %r14d
+; AVX512-NEXT: tzcntq 56(%rdi), %rax
+; AVX512-NEXT: tzcntq %r11, %rdi
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: cmovnel %edi, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r9, %r10
+; AVX512-NEXT: cmovnel %r14d, %eax
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r8, %rsi
+; AVX512-NEXT: orq %rdx, %rcx
+; AVX512-NEXT: orq %rsi, %rcx
+; AVX512-NEXT: cmovnel %ebx, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: retq
+ %a0 = load i512, ptr %p0
+ %cnt = call i512 @llvm.cttz.i512(i512 %a0, i1 0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @test_cttz_i1024(i1024 %a0) nounwind {
+; SSE-LABEL: test_cttz_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq %r9, %r13
+; SSE-NEXT: movq %r8, %r14
+; SSE-NEXT: movq %rcx, %rbx
+; SSE-NEXT: movq %rdx, %r10
+; SSE-NEXT: movq %rsi, %r9
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: rep bsfq %rdi, %rax
+; SSE-NEXT: rep bsfq %r9, %r15
+; SSE-NEXT: addl $64, %r15d
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %eax, %r15d
+; SSE-NEXT: rep bsfq %r10, %r12
+; SSE-NEXT: rep bsfq %rcx, %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %r12d, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: movq %rdi, %r12
+; SSE-NEXT: orq %r9, %r12
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: rep bsfq %r8, %r15
+; SSE-NEXT: movq %r13, %rcx
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: rep bsfq %r13, %r13
+; SSE-NEXT: addl $64, %r13d
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %r15d, %r13d
+; SSE-NEXT: rep bsfq %rdx, %r12
+; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r15
+; SSE-NEXT: addl $64, %r15d
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %r12d, %r15d
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; SSE-NEXT: subl $-128, %r15d
+; SSE-NEXT: movq %r8, %rbp
+; SSE-NEXT: orq %rcx, %rbp
+; SSE-NEXT: cmovnel %r13d, %r15d
+; SSE-NEXT: addl $256, %r15d # imm = 0x100
+; SSE-NEXT: movq %r9, %r13
+; SSE-NEXT: orq %rbx, %r13
+; SSE-NEXT: movq %rdi, %rbp
+; SSE-NEXT: orq %r10, %rbp
+; SSE-NEXT: orq %r13, %rbp
+; SSE-NEXT: cmovnel %eax, %r15d
+; SSE-NEXT: rep bsfq %r11, %r13
+; SSE-NEXT: rep bsfq %r12, %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r11, %r11
+; SSE-NEXT: cmovnel %r13d, %eax
+; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %r13
+; SSE-NEXT: addl $64, %r13d
+; SSE-NEXT: rep bsfq %rsi, %rcx
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %ecx, %r13d
+; SSE-NEXT: subl $-128, %r13d
+; SSE-NEXT: movq %r11, %rcx
+; SSE-NEXT: orq %r12, %rcx
+; SSE-NEXT: cmovnel %eax, %r13d
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; SSE-NEXT: rep bsfq %rbp, %rcx
+; SSE-NEXT: addl $64, %ecx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: rep bsfq %rdx, %rax
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: rep bsfq %r8, %rsi
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %rbp, %rdx
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r12
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r11
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r12, %r11
+; SSE-NEXT: cmovnel %r13d, %eax
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %rbx
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; SSE-NEXT: orq %rbx, %r9
+; SSE-NEXT: orq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: orq %r14, %rdi
+; SSE-NEXT: orq %r10, %rdi
+; SSE-NEXT: addl $512, %eax # imm = 0x200
+; SSE-NEXT: orq %r9, %rdi
+; SSE-NEXT: cmovnel %r15d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_cttz_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq %r9, %rbx
+; AVX2-NEXT: movq %r8, %r14
+; AVX2-NEXT: movq %rcx, %r11
+; AVX2-NEXT: movq %rdx, %r10
+; AVX2-NEXT: movq %rsi, %r9
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; AVX2-NEXT: tzcntq %rdi, %rax
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: tzcntq %r9, %r15
+; AVX2-NEXT: addl $64, %r15d
+; AVX2-NEXT: testq %rdi, %rdi
+; AVX2-NEXT: cmovnel %eax, %r15d
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: tzcntq %r10, %r12
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %r11, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %r12d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: movq %rdi, %r12
+; AVX2-NEXT: orq %r9, %r12
+; AVX2-NEXT: cmovnel %r15d, %eax
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: tzcntq %r14, %r15
+; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: tzcntq %rbx, %r12
+; AVX2-NEXT: addl $64, %r12d
+; AVX2-NEXT: testq %r14, %r14
+; AVX2-NEXT: cmovnel %r15d, %r12d
+; AVX2-NEXT: xorl %r13d, %r13d
+; AVX2-NEXT: tzcntq %rcx, %r13
+; AVX2-NEXT: xorl %r15d, %r15d
+; AVX2-NEXT: tzcntq %rdx, %r15
+; AVX2-NEXT: addl $64, %r15d
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %r13d, %r15d
+; AVX2-NEXT: subl $-128, %r15d
+; AVX2-NEXT: movq %r14, %r13
+; AVX2-NEXT: orq %rbx, %r13
+; AVX2-NEXT: cmovnel %r12d, %r15d
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT: addl $256, %r15d # imm = 0x100
+; AVX2-NEXT: movq %r9, %r13
+; AVX2-NEXT: orq %r11, %r13
+; AVX2-NEXT: movq %rdi, %rbp
+; AVX2-NEXT: orq %r10, %rbp
+; AVX2-NEXT: orq %r13, %rbp
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT: cmovnel %eax, %r15d
+; AVX2-NEXT: xorl %ebp, %ebp
+; AVX2-NEXT: tzcntq %r12, %rbp
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %r13, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r12, %r12
+; AVX2-NEXT: cmovnel %ebp, %eax
+; AVX2-NEXT: xorl %ebp, %ebp
+; AVX2-NEXT: tzcntq %r8, %rbp
+; AVX2-NEXT: addl $64, %ebp
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: tzcntq %rsi, %rcx
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %ecx, %ebp
+; AVX2-NEXT: subl $-128, %ebp
+; AVX2-NEXT: movq %r12, %rcx
+; AVX2-NEXT: orq %r13, %rcx
+; AVX2-NEXT: cmovnel %eax, %ebp
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: tzcntq %rbx, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rdx, %rax
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; AVX2-NEXT: tzcntq %r8, %rsi
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %esi, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rbx, %rdx
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r13
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r12
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r13, %r12
+; AVX2-NEXT: cmovnel %ebp, %eax
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r11
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX2-NEXT: orq %r11, %r9
+; AVX2-NEXT: orq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: orq %r14, %rdi
+; AVX2-NEXT: orq %r10, %rdi
+; AVX2-NEXT: addl $512, %eax # imm = 0x200
+; AVX2-NEXT: orq %r9, %rdi
+; AVX2-NEXT: cmovnel %r15d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_cttz_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq %r9, %r14
+; AVX512-NEXT: movq %r8, %r15
+; AVX512-NEXT: movq %rcx, %r11
+; AVX512-NEXT: movq %rdx, %r10
+; AVX512-NEXT: movq %rsi, %r9
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT: tzcntq %rdi, %rax
+; AVX512-NEXT: tzcntq %r9, %r12
+; AVX512-NEXT: addl $64, %r12d
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: cmovnel %eax, %r12d
+; AVX512-NEXT: tzcntq %rdx, %r13
+; AVX512-NEXT: tzcntq %r11, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %r13d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: movq %rdi, %r13
+; AVX512-NEXT: orq %r9, %r13
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: tzcntq %r8, %r12
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: tzcntq %r14, %r13
+; AVX512-NEXT: addl $64, %r13d
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %r12d, %r13d
+; AVX512-NEXT: tzcntq %rcx, %rbp
+; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT: addl $64, %r12d
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %ebp, %r12d
+; AVX512-NEXT: subl $-128, %r12d
+; AVX512-NEXT: movq %r8, %rbp
+; AVX512-NEXT: orq %r14, %rbp
+; AVX512-NEXT: cmovnel %r13d, %r12d
+; AVX512-NEXT: addl $256, %r12d # imm = 0x100
+; AVX512-NEXT: movq %r9, %r13
+; AVX512-NEXT: orq %r11, %r13
+; AVX512-NEXT: movq %rdi, %rbp
+; AVX512-NEXT: orq %rdx, %rbp
+; AVX512-NEXT: orq %r13, %rbp
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT: cmovnel %eax, %r12d
+; AVX512-NEXT: tzcntq %rbx, %rbp
+; AVX512-NEXT: tzcntq %r13, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rbx, %rbx
+; AVX512-NEXT: cmovnel %ebp, %eax
+; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rbp
+; AVX512-NEXT: addl $64, %ebp
+; AVX512-NEXT: tzcntq %rsi, %rcx
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %ecx, %ebp
+; AVX512-NEXT: subl $-128, %ebp
+; AVX512-NEXT: movq %rbx, %rcx
+; AVX512-NEXT: orq %r13, %rcx
+; AVX512-NEXT: cmovnel %eax, %ebp
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
+; AVX512-NEXT: tzcntq %r14, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT: tzcntq %rdx, %rax
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %ecx
+; AVX512-NEXT: tzcntq {{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; AVX512-NEXT: tzcntq %r8, %rsi
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %esi, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r14, %rdx
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r13
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %rbx
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r13, %rbx
+; AVX512-NEXT: cmovnel %ebp, %eax
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r11
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX512-NEXT: orq %r11, %r9
+; AVX512-NEXT: orq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: orq %r15, %rdi
+; AVX512-NEXT: orq %r10, %rdi
+; AVX512-NEXT: addl $512, %eax # imm = 0x200
+; AVX512-NEXT: orq %r9, %rdi
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: retq
+ %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
+
+define i32 @load_cttz_i1024(ptr %p0) nounwind {
+; SSE-LABEL: load_cttz_i1024:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq 88(%rdi), %r10
+; SSE-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 56(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 40(%rdi), %rsi
+; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 24(%rdi), %r9
+; SSE-NEXT: movq 16(%rdi), %r15
+; SSE-NEXT: movq (%rdi), %r8
+; SSE-NEXT: movq 8(%rdi), %r11
+; SSE-NEXT: rep bsfq %r8, %rax
+; SSE-NEXT: rep bsfq %r11, %rdx
+; SSE-NEXT: addl $64, %edx
+; SSE-NEXT: testq %r8, %r8
+; SSE-NEXT: cmovnel %eax, %edx
+; SSE-NEXT: rep bsfq %r15, %rbx
+; SSE-NEXT: rep bsfq %r9, %rax
+; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r15, %r15
+; SSE-NEXT: cmovnel %ebx, %eax
+; SSE-NEXT: movq 32(%rdi), %rbx
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: movq %r8, %r14
+; SSE-NEXT: orq %r11, %r14
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: rep bsfq %rbx, %rdx
+; SSE-NEXT: rep bsfq %rsi, %r12
+; SSE-NEXT: addl $64, %r12d
+; SSE-NEXT: testq %rbx, %rbx
+; SSE-NEXT: cmovnel %edx, %r12d
+; SSE-NEXT: movq 48(%rdi), %r13
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: rep bsfq %r13, %rdx
+; SSE-NEXT: rep bsfq %rcx, %r14
+; SSE-NEXT: addl $64, %r14d
+; SSE-NEXT: testq %r13, %r13
+; SSE-NEXT: cmovnel %edx, %r14d
+; SSE-NEXT: subl $-128, %r14d
+; SSE-NEXT: movq %rbx, %rdx
+; SSE-NEXT: orq %rsi, %rdx
+; SSE-NEXT: cmovnel %r12d, %r14d
+; SSE-NEXT: movq 72(%rdi), %r12
+; SSE-NEXT: addl $256, %r14d # imm = 0x100
+; SSE-NEXT: movq %r11, %rdx
+; SSE-NEXT: orq %r9, %rdx
+; SSE-NEXT: movq %r8, %r13
+; SSE-NEXT: orq %r15, %r13
+; SSE-NEXT: orq %rdx, %r13
+; SSE-NEXT: movq 64(%rdi), %r13
+; SSE-NEXT: cmovnel %eax, %r14d
+; SSE-NEXT: rep bsfq %r13, %rdx
+; SSE-NEXT: rep bsfq %r12, %rax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %r13, %r13
+; SSE-NEXT: cmovnel %edx, %eax
+; SSE-NEXT: rep bsfq %r10, %rbp
+; SSE-NEXT: addl $64, %ebp
+; SSE-NEXT: movq 80(%rdi), %r10
+; SSE-NEXT: rep bsfq %r10, %rcx
+; SSE-NEXT: testq %r10, %r10
+; SSE-NEXT: cmovnel %ecx, %ebp
+; SSE-NEXT: subl $-128, %ebp
+; SSE-NEXT: movq %r13, %rcx
+; SSE-NEXT: orq %r12, %rcx
+; SSE-NEXT: cmovnel %eax, %ebp
+; SSE-NEXT: movq 104(%rdi), %r9
+; SSE-NEXT: rep bsfq %r9, %rcx
+; SSE-NEXT: addl $64, %ecx
+; SSE-NEXT: movq 96(%rdi), %rdx
+; SSE-NEXT: rep bsfq %rdx, %rax
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %eax, %ecx
+; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: rep bsfq 120(%rdi), %rax
+; SSE-NEXT: movq 112(%rdi), %rdi
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: rep bsfq %rdi, %rsi
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: orq %r9, %rdx
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; SSE-NEXT: orq %r10, %r13
+; SSE-NEXT: addl $256, %eax # imm = 0x100
+; SSE-NEXT: orq %r12, %r13
+; SSE-NEXT: cmovnel %ebp, %eax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; SSE-NEXT: orq %rcx, %r11
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT: orq %rbx, %r8
+; SSE-NEXT: orq %r15, %r8
+; SSE-NEXT: addl $512, %eax # imm = 0x200
+; SSE-NEXT: orq %r11, %r8
+; SSE-NEXT: cmovnel %r14d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: load_cttz_i1024:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq 72(%rdi), %r14
+; AVX2-NEXT: movq 64(%rdi), %r15
+; AVX2-NEXT: movq 56(%rdi), %r9
+; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 48(%rdi), %rcx
+; AVX2-NEXT: movq 40(%rdi), %r10
+; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 32(%rdi), %rsi
+; AVX2-NEXT: movq 24(%rdi), %rbp
+; AVX2-NEXT: movq 16(%rdi), %rbx
+; AVX2-NEXT: movq (%rdi), %r8
+; AVX2-NEXT: movq 8(%rdi), %r11
+; AVX2-NEXT: tzcntq %r8, %rax
+; AVX2-NEXT: tzcntq %r11, %rdx
+; AVX2-NEXT: addl $64, %edx
+; AVX2-NEXT: testq %r8, %r8
+; AVX2-NEXT: cmovnel %eax, %edx
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: tzcntq %rbx, %r12
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rbp, %rax
+; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rbx, %rbx
+; AVX2-NEXT: cmovnel %r12d, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: movq %r8, %r12
+; AVX2-NEXT: orq %r11, %r12
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: tzcntq %rsi, %rdx
+; AVX2-NEXT: xorl %r13d, %r13d
+; AVX2-NEXT: tzcntq %r10, %r13
+; AVX2-NEXT: addl $64, %r13d
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: cmovnel %edx, %r13d
+; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: tzcntq %rcx, %rdx
+; AVX2-NEXT: xorl %r12d, %r12d
+; AVX2-NEXT: tzcntq %r9, %r12
+; AVX2-NEXT: addl $64, %r12d
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %edx, %r12d
+; AVX2-NEXT: subl $-128, %r12d
+; AVX2-NEXT: movq %rsi, %rdx
+; AVX2-NEXT: orq %r10, %rdx
+; AVX2-NEXT: cmovnel %r13d, %r12d
+; AVX2-NEXT: addl $256, %r12d # imm = 0x100
+; AVX2-NEXT: movq %r11, %rdx
+; AVX2-NEXT: orq %rbp, %rdx
+; AVX2-NEXT: movq %r8, %r13
+; AVX2-NEXT: orq %rbx, %r13
+; AVX2-NEXT: orq %rdx, %r13
+; AVX2-NEXT: cmovnel %eax, %r12d
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: tzcntq %r15, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %r14, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %r15, %r15
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: movq 88(%rdi), %rbp
+; AVX2-NEXT: xorl %r13d, %r13d
+; AVX2-NEXT: tzcntq %rbp, %r13
+; AVX2-NEXT: addl $64, %r13d
+; AVX2-NEXT: movq 80(%rdi), %r10
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: tzcntq %r10, %rcx
+; AVX2-NEXT: testq %r10, %r10
+; AVX2-NEXT: cmovnel %ecx, %r13d
+; AVX2-NEXT: subl $-128, %r13d
+; AVX2-NEXT: movq %r15, %rcx
+; AVX2-NEXT: orq %r14, %rcx
+; AVX2-NEXT: cmovnel %eax, %r13d
+; AVX2-NEXT: movq 104(%rdi), %r9
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: tzcntq %r9, %rcx
+; AVX2-NEXT: addl $64, %ecx
+; AVX2-NEXT: movq 96(%rdi), %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rdx, %rax
+; AVX2-NEXT: testq %rdx, %rdx
+; AVX2-NEXT: cmovnel %eax, %ecx
+; AVX2-NEXT: movq 112(%rdi), %rsi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq 120(%rdi), %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: tzcntq %rsi, %rdi
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %edi, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %r9, %rdx
+; AVX2-NEXT: cmovnel %ecx, %eax
+; AVX2-NEXT: orq %rbp, %r14
+; AVX2-NEXT: orq %r10, %r15
+; AVX2-NEXT: addl $256, %eax # imm = 0x100
+; AVX2-NEXT: orq %r14, %r15
+; AVX2-NEXT: cmovnel %r13d, %eax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX2-NEXT: orq %rcx, %r11
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX2-NEXT: orq %rbx, %r8
+; AVX2-NEXT: addl $512, %eax # imm = 0x200
+; AVX2-NEXT: orq %r11, %r8
+; AVX2-NEXT: cmovnel %r12d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_cttz_i1024:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: movq 88(%rdi), %rbp
+; AVX512-NEXT: movq 72(%rdi), %r15
+; AVX512-NEXT: movq 56(%rdi), %r9
+; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 48(%rdi), %rcx
+; AVX512-NEXT: movq 40(%rdi), %r10
+; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 32(%rdi), %rsi
+; AVX512-NEXT: movq 24(%rdi), %r14
+; AVX512-NEXT: movq 16(%rdi), %rbx
+; AVX512-NEXT: movq (%rdi), %r8
+; AVX512-NEXT: movq 8(%rdi), %r11
+; AVX512-NEXT: tzcntq %r8, %rax
+; AVX512-NEXT: tzcntq %r11, %rdx
+; AVX512-NEXT: addl $64, %edx
+; AVX512-NEXT: testq %r8, %r8
+; AVX512-NEXT: cmovnel %eax, %edx
+; AVX512-NEXT: tzcntq %rbx, %r12
+; AVX512-NEXT: tzcntq %r14, %rax
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %rbx, %rbx
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: movq %r8, %r12
+; AVX512-NEXT: orq %r11, %r12
+; AVX512-NEXT: cmovnel %edx, %eax
+; AVX512-NEXT: tzcntq %rsi, %rdx
+; AVX512-NEXT: tzcntq %r10, %r13
+; AVX512-NEXT: addl $64, %r13d
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: cmovnel %edx, %r13d
+; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: tzcntq %rcx, %rdx
+; AVX512-NEXT: tzcntq %r9, %r12
+; AVX512-NEXT: addl $64, %r12d
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: cmovnel %edx, %r12d
+; AVX512-NEXT: subl $-128, %r12d
+; AVX512-NEXT: movq %rsi, %rdx
+; AVX512-NEXT: orq %r10, %rdx
+; AVX512-NEXT: cmovnel %r13d, %r12d
+; AVX512-NEXT: addl $256, %r12d # imm = 0x100
+; AVX512-NEXT: movq %r11, %rdx
+; AVX512-NEXT: orq %r14, %rdx
+; AVX512-NEXT: movq %r8, %r13
+; AVX512-NEXT: orq %rbx, %r13
+; AVX512-NEXT: orq %rdx, %r13
+; AVX512-NEXT: movq 64(%rdi), %r13
+; AVX512-NEXT: cmovnel %eax, %r12d
+; AVX512-NEXT: tzcntq %r13, %rdx
+; AVX512-NEXT: tzcntq %r15, %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: testq %r13, %r13
+; AVX512-NEXT: cmovnel %edx, %eax
+; AVX512-NEXT: movq %rbp, %r14
+; AVX512-NEXT: tzcntq %rbp, %rbp
+; AVX512-NEXT: addl $64, %ebp
+; AVX512-NEXT: movq 80(%rdi), %r10
+; AVX512-NEXT: tzcntq %r10, %rcx
+; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: cmovnel %ecx, %ebp
+; AVX512-NEXT: subl $-128, %ebp
+; AVX512-NEXT: movq %r13, %rcx
+; AVX512-NEXT: orq %r15, %rcx
+; AVX512-NEXT: cmovnel %eax, %ebp
+; AVX512-NEXT: movq 104(%rdi), %r9
+; AVX512-NEXT: tzcntq %r9, %rcx
+; AVX512-NEXT: addl $64, %ecx
+; AVX512-NEXT: movq 96(%rdi), %rdx
+; AVX512-NEXT: tzcntq %rdx, %rax
+; AVX512-NEXT: testq %rdx, %rdx
+; AVX512-NEXT: cmovnel %eax, %ecx
+; AVX512-NEXT: movq 112(%rdi), %rsi
+; AVX512-NEXT: tzcntq 120(%rdi), %rax
+; AVX512-NEXT: addl $64, %eax
+; AVX512-NEXT: tzcntq %rsi, %rdi
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: cmovnel %edi, %eax
+; AVX512-NEXT: subl $-128, %eax
+; AVX512-NEXT: orq %r9, %rdx
+; AVX512-NEXT: cmovnel %ecx, %eax
+; AVX512-NEXT: orq %r14, %r15
+; AVX512-NEXT: orq %r10, %r13
+; AVX512-NEXT: addl $256, %eax # imm = 0x100
+; AVX512-NEXT: orq %r15, %r13
+; AVX512-NEXT: cmovnel %ebp, %eax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; AVX512-NEXT: orq %rcx, %r11
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX512-NEXT: orq %rbx, %r8
+; AVX512-NEXT: addl $512, %eax # imm = 0x200
+; AVX512-NEXT: orq %r11, %r8
+; AVX512-NEXT: cmovnel %r12d, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: retq
+ %a0 = load i1024, ptr %p0
+ %cnt = call i1024 @llvm.cttz.i1024(i1024 %a0, i1 0)
+ %res = trunc i1024 %cnt to i32
+ ret i32 %res
+}
diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic.ll b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
new file mode 100644
index 0000000..13aaa90
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i64 @llvm.alloc.token.id.i64(metadata)
+
+define i64 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %token_no_ptr = call i64 @llvm.alloc.token.id.i64(metadata !0)
+ ret i64 %token_no_ptr
+}
+
+define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i64 1
+;
+entry:
+ %token_with_ptr = call i64 @llvm.alloc.token.id.i64(metadata !1)
+ ret i64 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
diff --git a/llvm/test/Instrumentation/AllocToken/intrinsic32.ll b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
new file mode 100644
index 0000000..eb5dbbe
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+declare i32 @llvm.alloc.token.id.i32(metadata)
+
+define i32 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %token_no_ptr = call i32 @llvm.alloc.token.id.i32(metadata !0)
+ ret i32 %token_no_ptr
+}
+
+define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 1
+;
+entry:
+ %token_with_ptr = call i32 @llvm.alloc.token.id.i32(metadata !1)
+ ret i32 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index d3b44eb..8160544 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04]
v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 98d07ac..d913bd2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 29bfa54..7af0bfe5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -237,64 +237,76 @@
# GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
+
+0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf
# GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll
index 453ca66..0b8eda4 100644
--- a/llvm/test/Transforms/InstCombine/select-and-or.ll
+++ b/llvm/test/Transforms/InstCombine/select-and-or.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
declare void @use(i1)
@@ -6,6 +6,10 @@ declare i1 @gen_i1()
declare <2 x i1> @gen_v2i1()
; Should not be converted to "and", which has different poison semantics.
+;.
+; CHECK: @g1 = external global i16
+; CHECK: @g2 = external global i16
+;.
define i1 @logical_and(i1 %a, i1 %b) {
; CHECK-LABEL: @logical_and(
; CHECK-NEXT: [[RES:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false
@@ -225,29 +229,29 @@ define i1 @not_not_true(i1 %x, i1 %y) {
; (!x && !y) --> !(x || y)
-define i1 @not_not_false(i1 %x, i1 %y) {
+define i1 @not_not_false(i1 %x, i1 %y) !prof !0 {
; CHECK-LABEL: @not_not_false(
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 true, i1 [[Y:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[R]]
;
%notx = xor i1 %x, true
%noty = xor i1 %y, true
- %r = select i1 %notx, i1 %noty, i1 false
+ %r = select i1 %notx, i1 %noty, i1 false, !prof !1
ret i1 %r
}
; (!x || !y) --> !(x && y)
-define i1 @not_true_not(i1 %x, i1 %y) {
+define i1 @not_true_not(i1 %x, i1 %y) !prof !0 {
; CHECK-LABEL: @not_true_not(
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[X:%.*]], i1 [[Y:%.*]], i1 false, !prof [[PROF1]]
; CHECK-NEXT: [[R:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[R]]
;
%notx = xor i1 %x, true
%noty = xor i1 %y, true
- %r = select i1 %notx, i1 true, i1 %noty
+ %r = select i1 %notx, i1 true, i1 %noty, !prof !1
ret i1 %r
}
@@ -1348,3 +1352,12 @@ define i8 @test_logical_commuted_and_ne_a_b(i1 %other_cond, i8 %a, i8 %b) {
%select = select i1 %or.cond, i8 %a, i8 %b
ret i8 %select
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2}
+;.
diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index d88eaf8..3d97048 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -58,15 +58,15 @@ define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 {
ret i1 %res
}
-define i1 @xor_and(i1 %c, i32 %X, i32 %Y) {
+define i1 @xor_and(i1 %c, i32 %X, i32 %Y) !prof !0 {
; CHECK-LABEL: @xor_and(
; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 true, i1 [[COMP]], !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: ret i1 [[SEL]]
;
%comp = icmp ult i32 %X, %Y
- %sel = select i1 %c, i1 %comp, i1 false
+ %sel = select i1 %c, i1 %comp, i1 false, !prof !1
%res = xor i1 %sel, true
ret i1 %res
}
@@ -97,15 +97,15 @@ define <2 x i1> @xor_and3(<2 x i1> %c, <2 x i32> %X, <2 x i32> %Y) {
ret <2 x i1> %res
}
-define i1 @xor_or(i1 %c, i32 %X, i32 %Y) {
+define i1 @xor_or(i1 %c, i32 %X, i32 %Y) !prof !0 {
; CHECK-LABEL: @xor_or(
; CHECK-NEXT: [[COMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C]], i1 [[COMP]], i1 false, !prof [[PROF2]]
; CHECK-NEXT: ret i1 [[SEL]]
;
%comp = icmp ult i32 %X, %Y
- %sel = select i1 %c, i1 true, i1 %comp
+ %sel = select i1 %c, i1 true, i1 %comp, !prof !1
%res = xor i1 %sel, true
ret i1 %res
}
@@ -802,4 +802,5 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) {
;.
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 2}
;.
diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
new file mode 100644
index 0000000..7816781
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+@A = extern_weak global float, align 4
+
+; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true
+; and false values, while %v1 and %phi.to.remove are actually the same.
+; Fold the selection instruction %same.as.v1 to %v1.
+define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+ %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+ %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+ %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+ %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+ %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+ %q.load = load float, ptr %q
+ %c.load = load float, ptr %c
+ %sub = fsub float %q.load, %c.load
+ %cmp1 = fcmp olt float %sub, %v0
+ %v0.1 = select i1 %cmp1, float %sub, float %v0
+ %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+ %cmp2 = fcmp ogt float %sub, %same.as.v1
+ %v1.1 = select i1 %cmp2, float %sub, float %v1
+ %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+ %inc.i = add nuw nsw i32 %i, 1
+ %q.next = getelementptr inbounds i8, ptr %q, i64 4
+ %c.next = getelementptr inbounds i8, ptr %c, i64 4
+ %exitcond = icmp eq i32 %inc.i, %count
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+ store float %vl.1.lcssa, ptr @A
+ ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+ %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+ %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+ %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+ %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+ %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+ %q.load = load float, ptr %q
+ %c.load = load float, ptr %c
+ %sub = fsub float %q.load, %c.load
+ %cmp1 = fcmp olt float %sub, %v0
+ %v0.1 = select i1 %cmp1, float %sub, float %v0
+ %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+ %cmp2 = fcmp ogt float %sub, %same.as.v1
+ %v1.1 = select i1 %cmp2, float %v1, float %sub
+ %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
+ %inc.i = add nuw nsw i32 %i, 1
+ %q.next = getelementptr inbounds i8, ptr %q, i64 4
+ %c.next = getelementptr inbounds i8, ptr %c, i64 4
+ %exitcond = icmp eq i32 %inc.i, %count
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+ store float %vl.1.lcssa, ptr @A
+ ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; same.as.v1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+ %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+ %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+ %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+ %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+ %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+ %q.load = load float, ptr %q
+ %c.load = load float, ptr %c
+ %sub = fsub float %q.load, %c.load
+ %cmp1 = fcmp olt float %sub, %v0
+ %v0.1 = select i1 %cmp1, float %sub, float %v0
+ %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+ %cmp2 = fcmp ogt float %sub, %same.as.v1
+ %v1.1 = select i1 %cmp2, float %sub, float %v1
+ %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+ %inc.i = add nuw nsw i32 %i, 1
+ %q.next = getelementptr inbounds i8, ptr %q, i64 4
+ %c.next = getelementptr inbounds i8, ptr %c, i64 4
+ %exitcond = icmp eq i32 %inc.i, %count
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+ store float %vl.1.lcssa, ptr @A
+ ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+ %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+ %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+ %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+ %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+ %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+ %q.load = load float, ptr %q
+ %c.load = load float, ptr %c
+ %sub = fsub float %q.load, %c.load
+ %cmp1 = fcmp olt float %sub, %v0
+ %v0.1 = select i1 %cmp1, float %sub, float %v0
+ %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+ %cmp2 = fcmp ogt float %sub, %same.as.v1
+ %v1.1 = select i1 %cmp2, float %v1, float %sub
+ %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
+ %inc.i = add nuw nsw i32 %i, 1
+ %q.next = getelementptr inbounds i8, ptr %q, i64 4
+ %c.next = getelementptr inbounds i8, ptr %c, i64 4
+ %exitcond = icmp eq i32 %inc.i, %count
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+ store float %vl.1.lcssa, ptr @A
+ ret void
+}
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 8b03db3..152f7db 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -172,6 +172,11 @@ static cl::opt<bool>
cl::desc("Print MIR2Vec vocabulary contents"),
cl::init(false));
+static cl::opt<bool>
+ PrintMIR2Vec("print-mir2vec", cl::Hidden,
+ cl::desc("Print MIR2Vec embeddings for functions"),
+ cl::init(false));
+
static cl::list<std::string> IncludeDirs("I", cl::desc("include search path"));
static cl::opt<bool> RemarksWithHotness(
@@ -775,6 +780,11 @@ static int compileModule(char **argv, LLVMContext &Context) {
PM.add(createMIR2VecVocabPrinterLegacyPass(errs()));
}
+ // Add MIR2Vec printer if requested
+ if (PrintMIR2Vec) {
+ PM.add(createMIR2VecPrinterLegacyPass(errs()));
+ }
+
PM.add(createFreeMachineFunctionPass());
} else {
if (Target->addPassesToEmitFile(PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
@@ -788,6 +798,11 @@ static int compileModule(char **argv, LLVMContext &Context) {
if (PrintMIR2VecVocab) {
PM.add(createMIR2VecVocabPrinterLegacyPass(errs()));
}
+
+ // Add MIR2Vec printer if requested
+ if (PrintMIR2Vec) {
+ PM.add(createMIR2VecPrinterLegacyPass(errs()));
+ }
}
Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(),
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 79f0527..11eb58e 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -202,9 +202,10 @@ static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."),
aliasopt(IgnoreCase), cl::NotHidden);
static list<std::string> Name(
"name",
- desc("Find and print all debug info entries whose name (DW_AT_name "
- "attribute) matches the exact text in <pattern>. When used with the "
- "the -regex option <pattern> is interpreted as a regular expression."),
+ desc("Find and print all debug info entries whose name "
+ "(DW_AT_name/DW_AT_linkage_name attribute) matches the exact text "
+ "in <pattern>. When used with the the -regex option <pattern> is "
+ "interpreted as a regular expression."),
value_desc("pattern"), cat(DwarfDumpCategory));
static alias NameAlias("n", desc("Alias for --name"), aliasopt(Name),
cl::NotHidden);
diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
index 58f5dcc6..3c2e963 100644
--- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
+++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
@@ -283,7 +283,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) {
OnDiskGraphDB::FaultInPolicy::SingleNode);
}
-#if defined(EXPENSIVE_CHECKS)
+#if defined(EXPENSIVE_CHECKS) && !defined(_WIN32)
TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) {
setMaxOnDiskCASMappingSize();
unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
index 89c03b8..19ea8f5 100644
--- a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
+++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
@@ -33,13 +33,13 @@ TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) {
}
ValueType ValW = valueFromString("world");
- ArrayRef<char> Val;
+ std::optional<ArrayRef<char>> Val;
ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded());
- EXPECT_EQ(Val, ArrayRef(ValW));
+ EXPECT_EQ(*Val, ArrayRef(ValW));
ASSERT_THAT_ERROR(
DB->put(digest("hello"), valueFromString("other")).moveInto(Val),
Succeeded());
- EXPECT_EQ(Val, ArrayRef(ValW));
+ EXPECT_EQ(*Val, ArrayRef(ValW));
{
std::optional<ArrayRef<char>> Val;
diff --git a/llvm/unittests/CodeGen/MIR2VecTest.cpp b/llvm/unittests/CodeGen/MIR2VecTest.cpp
index 11222b4..8710d6b 100644
--- a/llvm/unittests/CodeGen/MIR2VecTest.cpp
+++ b/llvm/unittests/CodeGen/MIR2VecTest.cpp
@@ -82,6 +82,9 @@ protected:
return;
}
+ // Set the data layout to match the target machine
+ M->setDataLayout(TM->createDataLayout());
+
// Create a dummy function to get subtarget info
FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *F =
@@ -96,16 +99,27 @@ protected:
}
void TearDown() override { TII = nullptr; }
-};
-// Function to find an opcode by name
-static int findOpcodeByName(const TargetInstrInfo *TII, StringRef Name) {
- for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) {
- if (TII->getName(Opcode) == Name)
- return Opcode;
+ // Find an opcode by name
+ int findOpcodeByName(StringRef Name) {
+ for (unsigned Opcode = 1; Opcode < TII->getNumOpcodes(); ++Opcode) {
+ if (TII->getName(Opcode) == Name)
+ return Opcode;
+ }
+ return -1; // Not found
}
- return -1; // Not found
-}
+
+ // Create a vocabulary with specific opcodes and embeddings
+ Expected<MIRVocabulary>
+ createTestVocab(std::initializer_list<std::pair<const char *, float>> opcodes,
+ unsigned dimension = 2) {
+ assert(TII && "TargetInstrInfo not initialized");
+ VocabMap VMap;
+ for (const auto &[name, value] : opcodes)
+ VMap[name] = Embedding(dimension, value);
+ return MIRVocabulary::create(std::move(VMap), *TII);
+ }
+};
TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) {
// Test that same base opcodes get same canonical indices
@@ -118,10 +132,8 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) {
// Create a MIRVocabulary instance to test the mapping
// Use a minimal MIRVocabulary to trigger canonical mapping construction
- VocabMap VMap;
Embedding Val = Embedding(64, 1.0f);
- VMap["ADD"] = Val;
- auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII);
+ auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64);
ASSERT_TRUE(static_cast<bool>(TestVocabOrErr))
<< "Failed to create vocabulary: "
<< toString(TestVocabOrErr.takeError());
@@ -156,16 +168,16 @@ TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) {
6880u); // X86 has >6880 unique base opcodes
// Check that the embeddings for opcodes not in the vocab are zero vectors
- int Add32rrOpcode = findOpcodeByName(TII, "ADD32rr");
+ int Add32rrOpcode = findOpcodeByName("ADD32rr");
ASSERT_NE(Add32rrOpcode, -1) << "ADD32rr opcode not found";
EXPECT_TRUE(TestVocab[Add32rrOpcode].approximatelyEquals(Val));
- int Sub32rrOpcode = findOpcodeByName(TII, "SUB32rr");
+ int Sub32rrOpcode = findOpcodeByName("SUB32rr");
ASSERT_NE(Sub32rrOpcode, -1) << "SUB32rr opcode not found";
EXPECT_TRUE(
TestVocab[Sub32rrOpcode].approximatelyEquals(Embedding(64, 0.0f)));
- int Mov32rrOpcode = findOpcodeByName(TII, "MOV32rr");
+ int Mov32rrOpcode = findOpcodeByName("MOV32rr");
ASSERT_NE(Mov32rrOpcode, -1) << "MOV32rr opcode not found";
EXPECT_TRUE(
TestVocab[Mov32rrOpcode].approximatelyEquals(Embedding(64, 0.0f)));
@@ -178,9 +190,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) {
// Create a MIRVocabulary instance to test deterministic mapping
// Use a minimal MIRVocabulary to trigger canonical mapping construction
- VocabMap VMap;
- VMap["ADD"] = Embedding(64, 1.0f);
- auto TestVocabOrErr = MIRVocabulary::create(std::move(VMap), *TII);
+ auto TestVocabOrErr = createTestVocab({{"ADD", 1.0f}}, 64);
ASSERT_TRUE(static_cast<bool>(TestVocabOrErr))
<< "Failed to create vocabulary: "
<< toString(TestVocabOrErr.takeError());
@@ -189,8 +199,6 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) {
unsigned Index1 = TestVocab.getCanonicalIndexForBaseName(BaseName);
unsigned Index2 = TestVocab.getCanonicalIndexForBaseName(BaseName);
unsigned Index3 = TestVocab.getCanonicalIndexForBaseName(BaseName);
-
- EXPECT_EQ(Index1, Index2);
EXPECT_EQ(Index2, Index3);
// Test across multiple runs
@@ -202,11 +210,7 @@ TEST_F(MIR2VecVocabTestFixture, DeterministicMapping) {
// Test MIRVocabulary construction
TEST_F(MIR2VecVocabTestFixture, VocabularyConstruction) {
- VocabMap VMap;
- VMap["ADD"] = Embedding(128, 1.0f); // Dimension 128, all values 1.0
- VMap["SUB"] = Embedding(128, 2.0f); // Dimension 128, all values 2.0
-
- auto VocabOrErr = MIRVocabulary::create(std::move(VMap), *TII);
+ auto VocabOrErr = createTestVocab({{"ADD", 1.0f}, {"SUB", 2.0f}}, 128);
ASSERT_TRUE(static_cast<bool>(VocabOrErr))
<< "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
auto &Vocab = *VocabOrErr;
@@ -243,4 +247,251 @@ TEST_F(MIR2VecVocabTestFixture, EmptyVocabularyCreation) {
}
}
+// Fixture for embedding related tests
+class MIR2VecEmbeddingTestFixture : public MIR2VecVocabTestFixture {
+protected:
+ std::unique_ptr<MachineModuleInfo> MMI;
+ MachineFunction *MF = nullptr;
+
+ void SetUp() override {
+ MIR2VecVocabTestFixture::SetUp();
+ // If base class setup was skipped (TII not initialized), skip derived setup
+ if (!TII)
+ GTEST_SKIP() << "Failed to get target instruction info in "
+ "the base class setup; Skipping test";
+
+ // Create a dummy function for MachineFunction
+ FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F =
+ Function::Create(FT, Function::ExternalLinkage, "test", M.get());
+
+ MMI = std::make_unique<MachineModuleInfo>(TM.get());
+ MF = &MMI->getOrCreateMachineFunction(*F);
+ }
+
+ void TearDown() override { MIR2VecVocabTestFixture::TearDown(); }
+
+ // Create a machine instruction
+ MachineInstr *createMachineInstr(MachineBasicBlock &MBB, unsigned Opcode) {
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ // Create instruction - operands don't affect opcode-based embeddings
+ MachineInstr *MI = BuildMI(MBB, MBB.end(), DebugLoc(), Desc);
+ return MI;
+ }
+
+ MachineInstr *createMachineInstr(MachineBasicBlock &MBB,
+ const char *OpcodeName) {
+ int Opcode = findOpcodeByName(OpcodeName);
+ if (Opcode == -1)
+ return nullptr;
+ return createMachineInstr(MBB, Opcode);
+ }
+
+ void createMachineInstrs(MachineBasicBlock &MBB,
+ std::initializer_list<const char *> Opcodes) {
+ for (const char *OpcodeName : Opcodes) {
+ MachineInstr *MI = createMachineInstr(MBB, OpcodeName);
+ ASSERT_TRUE(MI != nullptr);
+ }
+ }
+};
+
+// Test factory method for creating embedder
+TEST_F(MIR2VecEmbeddingTestFixture, CreateSymbolicEmbedder) {
+ auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1);
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &V = *VocabOrErr;
+ auto Emb = MIREmbedder::create(MIR2VecKind::Symbolic, *MF, V);
+ EXPECT_NE(Emb, nullptr);
+}
+
+TEST_F(MIR2VecEmbeddingTestFixture, CreateInvalidMode) {
+ auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 1);
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &V = *VocabOrErr;
+ auto Result = MIREmbedder::create(static_cast<MIR2VecKind>(-1), *MF, V);
+ EXPECT_FALSE(static_cast<bool>(Result));
+}
+
+// Test SymbolicMIREmbedder with simple target opcodes
+TEST_F(MIR2VecEmbeddingTestFixture, TestSymbolicEmbedder) {
+ // Create a test vocabulary with specific values
+ auto VocabOrErr = createTestVocab(
+ {
+ {"NOOP", 1.0f}, // [1.0, 1.0, 1.0, 1.0]
+ {"RET", 2.0f}, // [2.0, 2.0, 2.0, 2.0]
+ {"TRAP", 3.0f} // [3.0, 3.0, 3.0, 3.0]
+ },
+ 4);
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &Vocab = *VocabOrErr;
+ // Create a basic block using fixture's MF
+ MachineBasicBlock *MBB = MF->CreateMachineBasicBlock();
+ MF->push_back(MBB);
+
+ // Use real X86 opcodes that should exist and not be pseudo
+ auto NoopInst = createMachineInstr(*MBB, "NOOP");
+ ASSERT_TRUE(NoopInst != nullptr);
+
+ auto RetInst = createMachineInstr(*MBB, "RET64");
+ ASSERT_TRUE(RetInst != nullptr);
+
+ auto TrapInst = createMachineInstr(*MBB, "TRAP");
+ ASSERT_TRUE(TrapInst != nullptr);
+
+ // Verify these are not pseudo instructions
+ ASSERT_FALSE(NoopInst->isPseudo()) << "NOOP is marked as pseudo instruction";
+ ASSERT_FALSE(RetInst->isPseudo()) << "RET is marked as pseudo instruction";
+ ASSERT_FALSE(TrapInst->isPseudo()) << "TRAP is marked as pseudo instruction";
+
+ // Create embedder
+ auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab);
+ ASSERT_TRUE(Embedder != nullptr);
+
+ // Test instruction embeddings
+ auto NoopEmb = Embedder->getMInstVector(*NoopInst);
+ auto RetEmb = Embedder->getMInstVector(*RetInst);
+ auto TrapEmb = Embedder->getMInstVector(*TrapInst);
+
+ // Verify embeddings match expected values (accounting for weight scaling)
+ float ExpectedWeight = mir2vec::OpcWeight; // Global weight from command line
+ EXPECT_TRUE(NoopEmb.approximatelyEquals(Embedding(4, 1.0f * ExpectedWeight)));
+ EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(4, 2.0f * ExpectedWeight)));
+ EXPECT_TRUE(TrapEmb.approximatelyEquals(Embedding(4, 3.0f * ExpectedWeight)));
+
+ // Test basic block embedding (should be sum of instruction embeddings)
+ auto MBBVector = Embedder->getMBBVector(*MBB);
+
+ // Expected BB vector: NOOP + RET + TRAP = [1+2+3, 1+2+3, 1+2+3, 1+2+3] *
+ // weight = [6, 6, 6, 6] * weight
+ Embedding ExpectedMBBVector(4, 6.0f * ExpectedWeight);
+ EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedMBBVector));
+
+ // Test function embedding (should equal MBB embedding since we have one MBB)
+ auto MFuncVector = Embedder->getMFunctionVector();
+ EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBBVector));
+}
+
+// Test embedder with multiple basic blocks
+TEST_F(MIR2VecEmbeddingTestFixture, MultipleBasicBlocks) {
+ // Create a test vocabulary
+ auto VocabOrErr = createTestVocab({{"NOOP", 1.0f}, {"TRAP", 2.0f}});
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &Vocab = *VocabOrErr;
+
+ // Create two basic blocks using fixture's MF
+ MachineBasicBlock *MBB1 = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *MBB2 = MF->CreateMachineBasicBlock();
+ MF->push_back(MBB1);
+ MF->push_back(MBB2);
+
+ createMachineInstrs(*MBB1, {"NOOP", "NOOP"});
+ createMachineInstr(*MBB2, "TRAP");
+
+ // Create embedder
+ auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab);
+ ASSERT_TRUE(Embedder != nullptr);
+
+ // Test basic block embeddings
+ auto MBB1Vector = Embedder->getMBBVector(*MBB1);
+ auto MBB2Vector = Embedder->getMBBVector(*MBB2);
+
+ float ExpectedWeight = mir2vec::OpcWeight;
+ // BB1: NOOP + NOOP = 2 * ([1, 1] * weight)
+ Embedding ExpectedMBB1Vector(2, 2.0f * ExpectedWeight);
+ EXPECT_TRUE(MBB1Vector.approximatelyEquals(ExpectedMBB1Vector));
+
+ // BB2: TRAP = [2, 2] * weight
+ Embedding ExpectedMBB2Vector(2, 2.0f * ExpectedWeight);
+ EXPECT_TRUE(MBB2Vector.approximatelyEquals(ExpectedMBB2Vector));
+
+ // Function embedding: BB1 + BB2 = [2+2, 2+2] * weight = [4, 4] * weight
+ // Function embedding should be just the first BB embedding as the second BB
+ // is unreachable
+ auto MFuncVector = Embedder->getMFunctionVector();
+ EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedMBB1Vector));
+
+ // Add a branch from BB1 to BB2 to make both reachable; now function embedding
+ // should be MBB1 + MBB2
+ MBB1->addSuccessor(MBB2);
+ auto NewMFuncVector = Embedder->getMFunctionVector(); // Recompute embeddings
+ Embedding ExpectedFuncVector = MBB1Vector + MBB2Vector;
+ EXPECT_TRUE(NewMFuncVector.approximatelyEquals(ExpectedFuncVector));
+}
+
+// Test embedder with empty basic block
+TEST_F(MIR2VecEmbeddingTestFixture, EmptyBasicBlock) {
+
+ // Create an empty basic block
+ MachineBasicBlock *MBB = MF->CreateMachineBasicBlock();
+ MF->push_back(MBB);
+
+ // Create embedder
+ auto VocabOrErr = MIRVocabulary::createDummyVocabForTest(*TII, 2);
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &V = *VocabOrErr;
+ auto Embedder = SymbolicMIREmbedder::create(*MF, V);
+ ASSERT_TRUE(Embedder != nullptr);
+
+ // Test that empty BB has zero embedding
+ auto MBBVector = Embedder->getMBBVector(*MBB);
+ Embedding ExpectedBBVector(2, 0.0f);
+ EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector));
+
+ // Function embedding should also be zero
+ auto MFuncVector = Embedder->getMFunctionVector();
+ EXPECT_TRUE(MFuncVector.approximatelyEquals(ExpectedBBVector));
+}
+
+// Test embedder with opcodes not in vocabulary
+TEST_F(MIR2VecEmbeddingTestFixture, UnknownOpcodes) {
+ // Create a test vocabulary with limited entries
+ // SUB is intentionally not included
+ auto VocabOrErr = createTestVocab({{"ADD", 1.0f}});
+ ASSERT_TRUE(static_cast<bool>(VocabOrErr))
+ << "Failed to create vocabulary: " << toString(VocabOrErr.takeError());
+ auto &Vocab = *VocabOrErr;
+
+ // Create a basic block
+ MachineBasicBlock *MBB = MF->CreateMachineBasicBlock();
+ MF->push_back(MBB);
+
+ // Find opcodes
+ int AddOpcode = findOpcodeByName("ADD32rr");
+ int SubOpcode = findOpcodeByName("SUB32rr");
+
+ ASSERT_NE(AddOpcode, -1) << "ADD32rr opcode not found";
+ ASSERT_NE(SubOpcode, -1) << "SUB32rr opcode not found";
+
+ // Create instructions
+ MachineInstr *AddInstr = createMachineInstr(*MBB, AddOpcode);
+ MachineInstr *SubInstr = createMachineInstr(*MBB, SubOpcode);
+
+ // Create embedder
+ auto Embedder = SymbolicMIREmbedder::create(*MF, Vocab);
+ ASSERT_TRUE(Embedder != nullptr);
+
+ // Test instruction embeddings
+ auto AddVector = Embedder->getMInstVector(*AddInstr);
+ auto SubVector = Embedder->getMInstVector(*SubInstr);
+
+ float ExpectedWeight = mir2vec::OpcWeight;
+ // ADD should have the embedding from vocabulary
+ EXPECT_TRUE(
+ AddVector.approximatelyEquals(Embedding(2, 1.0f * ExpectedWeight)));
+
+ // SUB should have zero embedding (not in vocabulary)
+ EXPECT_TRUE(SubVector.approximatelyEquals(Embedding(2, 0.0f)));
+
+ // Basic block embedding should be ADD + SUB = [1.0, 1.0] * weight + [0.0,
+ // 0.0] = [1.0, 1.0] * weight
+ const auto &MBBVector = Embedder->getMBBVector(*MBB);
+ Embedding ExpectedBBVector(2, 1.0f * ExpectedWeight);
+ EXPECT_TRUE(MBBVector.approximatelyEquals(ExpectedBBVector));
+}
} // namespace
diff --git a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp
index b988a78a..08b4e8f 100644
--- a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp
@@ -54,7 +54,9 @@ protected:
return ContainerElementsMap();
ContainerElementsMap Result = SNs[0]->defs();
+#ifndef NDEBUG
const ContainerElementsMap &Deps = SNs[0]->deps();
+#endif // NDEBUG
for (size_t I = 1; I != SNs.size(); ++I) {
assert(!DepsMustMatch || SNs[I]->deps() == Deps);
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index 2a0f500..e108c4d 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -33,7 +33,7 @@ TEST_F(VPDominatorTreeTest, DominanceNoRegionsTest) {
VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2");
VPBasicBlock *VPBB3 = Plan.createVPBasicBlock("VPBB3");
VPBasicBlock *VPBB4 = Plan.createVPBasicBlock("VPBB4");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB1, VPBB4);
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB1, VPBB4);
VPBB2->setParent(R1);
VPBB3->setParent(R1);
@@ -99,7 +99,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) {
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB4 = Plan.createVPBasicBlock("");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4);
R1BB2->setParent(R1);
R1BB3->setParent(R1);
VPBlockUtils::connectBlocks(VPBB0, R1);
@@ -112,7 +112,7 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) {
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R1, R2);
@@ -171,12 +171,12 @@ TEST_F(VPDominatorTreeTest, DominanceRegionsTest) {
VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1");
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2");
VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3);
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2");
VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB#");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3);
R2BB2->setParent(R2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB2, R2BB1);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index db64c75..c1791dfa 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -269,7 +269,7 @@ TEST_F(VPBasicBlockTest, getPlan) {
// VPBasicBlock is the entry into the VPlan, followed by a region.
VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2);
VPBlockUtils::connectBlocks(R1BB1, R1BB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
@@ -286,12 +286,12 @@ TEST_F(VPBasicBlockTest, getPlan) {
VPlan &Plan = getPlan();
VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB2);
VPBlockUtils::connectBlocks(R1BB1, R1BB2);
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBasicBlock *VPBB1 = Plan.getEntry();
@@ -369,7 +369,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) {
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("");
VPBasicBlock *R1BB4 = Plan.createVPBasicBlock("");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB4, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB4);
R1BB2->setParent(R1);
R1BB3->setParent(R1);
VPBlockUtils::connectBlocks(VPBB0, R1);
@@ -382,7 +382,7 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) {
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R1, R2);
@@ -467,12 +467,12 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) {
VPBasicBlock *R1BB1 = Plan.createVPBasicBlock("R1BB1");
VPBasicBlock *R1BB2 = Plan.createVPBasicBlock("R1BB2");
VPBasicBlock *R1BB3 = Plan.createVPBasicBlock("R1BB3");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R1BB1, R1BB3, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R1BB1, R1BB3);
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2");
VPBasicBlock *R2BB3 = Plan.createVPBasicBlock("R2BB3");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB3, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB3);
R2BB2->setParent(R2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB2, R2BB1);
@@ -537,10 +537,10 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) {
VPlan &Plan = getPlan();
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1");
VPBasicBlock *R2BB2 = Plan.createVPBasicBlock("R2BB2");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R2BB2, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2);
R2->setParent(R1);
VPBasicBlock *VPBB1 = Plan.getEntry();
@@ -590,14 +590,14 @@ TEST_F(VPBasicBlockTest, TraversingIteratorTest) {
//
VPlan &Plan = getPlan();
VPBasicBlock *R3BB1 = Plan.createVPBasicBlock("R3BB1");
- VPRegionBlock *R3 = Plan.createVPRegionBlock(R3BB1, R3BB1, "R3");
+ VPRegionBlock *R3 = Plan.createLoopRegion("R3", R3BB1, R3BB1);
VPBasicBlock *R2BB1 = Plan.createVPBasicBlock("R2BB1");
- VPRegionBlock *R2 = Plan.createVPRegionBlock(R2BB1, R3, "R2");
+ VPRegionBlock *R2 = Plan.createLoopRegion("R2", R2BB1, R3);
R3->setParent(R2);
VPBlockUtils::connectBlocks(R2BB1, R3);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(R2, R2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", R2, R2);
R2->setParent(R1);
VPBasicBlock *VPBB1 = Plan.getEntry();
@@ -687,7 +687,7 @@ TEST_F(VPBasicBlockTest, reassociateBlocks) {
VPlan &Plan = getPlan();
VPBasicBlock *VPBB1 = Plan.createVPBasicBlock("VPBB1");
VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("VPBB2");
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
auto *WidenPhi = new VPWidenPHIRecipe(nullptr);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index c2f045b..50ad4d5 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -32,7 +32,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("");
VPBB2->appendRecipe(CanIV);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader());
@@ -71,7 +71,7 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
VPBB2->appendRecipe(DefI);
VPBB2->appendRecipe(BranchOnCond);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader());
@@ -117,7 +117,7 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
VPBlockUtils::connectBlocks(VPBB2, VPBB3);
VPBlockUtils::connectBlocks(VPBB3, VPBB4);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB4, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB4);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
@@ -160,7 +160,7 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) {
auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {});
VPBB3->appendRecipe(CanIV);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB3, VPBB3);
VPBlockUtils::connectBlocks(VPBB1, VPBB2);
VPBlockUtils::connectBlocks(VPBB2, R1);
VPBlockUtils::connectBlocks(VPBB4, Plan.getScalarHeader());
@@ -200,7 +200,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
VPBB2->appendRecipe(CanIV);
VPBB2->appendRecipe(BranchOnCond);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(VPBB1, R1);
@@ -237,7 +237,7 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
VPBlockUtils::connectBlocks(VPBB2, VPBB3);
VPBlockUtils::connectBlocks(VPBB2, VPBB3);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB3, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB3);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
@@ -270,7 +270,7 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) {
VPBB1->appendRecipe(DefI);
VPBB2->appendRecipe(BranchOnCond);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader());
@@ -302,7 +302,7 @@ TEST_F(VPVerifierTest, NonHeaderPHIInHeader) {
VPBB2->appendRecipe(IRPhi);
VPBB2->appendRecipe(BranchOnCond);
- VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1");
+ VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2);
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader());
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
index 5efc153..51911d7 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
@@ -47,7 +47,6 @@ static_library("Clang") {
"ClangASTImporter.cpp",
"ClangASTMetadata.cpp",
"ClangASTSource.cpp",
- "ClangDeclVendor.cpp",
"ClangExpressionDeclMap.cpp",
"ClangExpressionHelper.cpp",
"ClangExpressionParser.cpp",
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td
index f693a07..d9882cb 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td
@@ -101,6 +101,60 @@ def ClauseRequires : OpenMP_BitEnumAttr<
def ClauseRequiresAttr : OpenMP_EnumAttr<ClauseRequires, "clause_requires">;
+
+//===----------------------------------------------------------------------===//
+// clause_map_flag enum.
+//===----------------------------------------------------------------------===//
+
+def ClauseMapFlagsNone : I32BitEnumAttrCaseNone<"none">;
+def ClauseMapFlagsStorage : I32BitEnumAttrCaseBit<"storage", 0>; // alloc/release synonym
+def ClauseMapFlagsTo : I32BitEnumAttrCaseBit<"to", 1>;
+def ClauseMapFlagsFrom : I32BitEnumAttrCaseBit<"from", 2>;
+def ClauseMapFlagsAlways : I32BitEnumAttrCaseBit<"always", 3>;
+def ClauseMapFlagsDelete : I32BitEnumAttrCaseBit<"del", 4>; // delete, is reserved by C/C++
+def ClauseMapFlagsReturnParam : I32BitEnumAttrCaseBit<"return_param", 5>;
+def ClauseMapFlagsPrivate : I32BitEnumAttrCaseBit<"priv", 6>; // private, is reserved by C/C++
+def ClauseMapFlagsLiteral : I32BitEnumAttrCaseBit<"literal", 7>;
+def ClauseMapFlagsImplicit : I32BitEnumAttrCaseBit<"implicit", 8>;
+def ClauseMapFlagsClose : I32BitEnumAttrCaseBit<"close", 9>;
+def ClauseMapFlagsPresent : I32BitEnumAttrCaseBit<"present", 10>;
+def ClauseMapFlagsOMPXHold : I32BitEnumAttrCaseBit<"ompx_hold", 11>;
+def ClauseMapFlagsAttach : I32BitEnumAttrCaseBit<"attach", 12>;
+def ClauseMapFlagsAttachAlways : I32BitEnumAttrCaseBit<"attach_always", 13>;
+def ClauseMapFlagsAttachNone : I32BitEnumAttrCaseBit<"attach_none", 14>;
+def ClauseMapFlagsAttachAuto : I32BitEnumAttrCaseBit<"attach_auto", 15>;
+def ClauseMapFlagsRefPtr : I32BitEnumAttrCaseBit<"ref_ptr", 16>;
+def ClauseMapFlagsRefPtee : I32BitEnumAttrCaseBit<"ref_ptee", 17>;
+def ClauseMapFlagsRefPtrPtee : I32BitEnumAttrCaseBit<"ref_ptr_ptee", 18>;
+
+def ClauseMapFlags : OpenMP_BitEnumAttr<
+ "ClauseMapFlags",
+ "Map types and modifiers tied to data maps", [
+ ClauseMapFlagsNone,
+ ClauseMapFlagsStorage,
+ ClauseMapFlagsTo,
+ ClauseMapFlagsFrom,
+ ClauseMapFlagsAlways,
+ ClauseMapFlagsDelete,
+ ClauseMapFlagsReturnParam,
+ ClauseMapFlagsPrivate,
+ ClauseMapFlagsLiteral,
+ ClauseMapFlagsImplicit,
+ ClauseMapFlagsClose,
+ ClauseMapFlagsPresent,
+ ClauseMapFlagsOMPXHold,
+ ClauseMapFlagsAttach,
+ ClauseMapFlagsAttachAlways,
+ ClauseMapFlagsAttachNone,
+ ClauseMapFlagsAttachAuto,
+ ClauseMapFlagsRefPtr,
+ ClauseMapFlagsRefPtee,
+ ClauseMapFlagsRefPtrPtee
+ ]>;
+
+def ClauseMapFlagsAttr : OpenMP_EnumAttr<ClauseMapFlags,
+ "clause_map_flags">;
+
//===----------------------------------------------------------------------===//
// clause_task_depend enum.
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index b73091e..377f1fe 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1198,7 +1198,7 @@ def MapBoundsOp : OpenMP_Op<"map.bounds",
def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> {
let arguments = (ins OpenMP_PointerLikeType:$var_ptr,
TypeAttr:$var_type,
- UI64Attr:$map_type,
+ ClauseMapFlagsAttr:$map_type,
VariableCaptureKindAttr:$map_capture_type,
Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
Variadic<OpenMP_PointerLikeType>:$members,
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index fd4cabbad..1b069c6 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -32,7 +32,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/bit.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Support/InterleavedRange.h"
#include <cstddef>
#include <iterator>
@@ -1737,10 +1736,10 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) {
// Parser, printer and verifier for Target
//===----------------------------------------------------------------------===//
-// Helper function to get bitwise AND of `value` and 'flag'
-static uint64_t mapTypeToBitFlag(uint64_t value,
- llvm::omp::OpenMPOffloadMappingFlags flag) {
- return value & llvm::to_underlying(flag);
+// Helper function to get bitwise AND of `value` and 'flag' then return it as a
+// boolean
+static bool mapTypeToBool(ClauseMapFlags value, ClauseMapFlags flag) {
+ return (value & flag) == flag;
}
/// Parses a map_entries map type from a string format back into its numeric
@@ -1748,10 +1747,9 @@ static uint64_t mapTypeToBitFlag(uint64_t value,
///
/// map-clause = `map_clauses ( ( `(` `always, `? `implicit, `? `ompx_hold, `?
/// `close, `? `present, `? ( `to` | `from` | `delete` `)` )+ `)` )
-static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) {
- llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
-
+static ParseResult parseMapClause(OpAsmParser &parser,
+ ClauseMapFlagsAttr &mapType) {
+ ClauseMapFlags mapTypeBits = ClauseMapFlags::none;
// This simply verifies the correct keyword is read in, the
// keyword itself is stored inside of the operation
auto parseTypeAndMod = [&]() -> ParseResult {
@@ -1760,35 +1758,64 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) {
return failure();
if (mapTypeMod == "always")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
+ mapTypeBits |= ClauseMapFlags::always;
if (mapTypeMod == "implicit")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mapTypeBits |= ClauseMapFlags::implicit;
if (mapTypeMod == "ompx_hold")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
+ mapTypeBits |= ClauseMapFlags::ompx_hold;
if (mapTypeMod == "close")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+ mapTypeBits |= ClauseMapFlags::close;
if (mapTypeMod == "present")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
+ mapTypeBits |= ClauseMapFlags::present;
if (mapTypeMod == "to")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapTypeBits |= ClauseMapFlags::to;
if (mapTypeMod == "from")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapTypeBits |= ClauseMapFlags::from;
if (mapTypeMod == "tofrom")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ mapTypeBits |= ClauseMapFlags::to | ClauseMapFlags::from;
if (mapTypeMod == "delete")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
+ mapTypeBits |= ClauseMapFlags::del;
+
+ if (mapTypeMod == "storage")
+ mapTypeBits |= ClauseMapFlags::storage;
if (mapTypeMod == "return_param")
- mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ mapTypeBits |= ClauseMapFlags::return_param;
+
+ if (mapTypeMod == "private")
+ mapTypeBits |= ClauseMapFlags::priv;
+
+ if (mapTypeMod == "literal")
+ mapTypeBits |= ClauseMapFlags::literal;
+
+ if (mapTypeMod == "attach")
+ mapTypeBits |= ClauseMapFlags::attach;
+
+ if (mapTypeMod == "attach_always")
+ mapTypeBits |= ClauseMapFlags::attach_always;
+
+ if (mapTypeMod == "attach_none")
+ mapTypeBits |= ClauseMapFlags::attach_none;
+
+ if (mapTypeMod == "attach_auto")
+ mapTypeBits |= ClauseMapFlags::attach_auto;
+
+ if (mapTypeMod == "ref_ptr")
+ mapTypeBits |= ClauseMapFlags::ref_ptr;
+
+ if (mapTypeMod == "ref_ptee")
+ mapTypeBits |= ClauseMapFlags::ref_ptee;
+
+ if (mapTypeMod == "ref_ptr_ptee")
+ mapTypeBits |= ClauseMapFlags::ref_ptr_ptee;
return success();
};
@@ -1796,9 +1823,8 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) {
if (parser.parseCommaSeparatedList(parseTypeAndMod))
return failure();
- mapType = parser.getBuilder().getIntegerAttr(
- parser.getBuilder().getIntegerType(64, /*isSigned=*/false),
- llvm::to_underlying(mapTypeBits));
+ mapType =
+ parser.getBuilder().getAttr<mlir::omp::ClauseMapFlagsAttr>(mapTypeBits);
return success();
}
@@ -1806,60 +1832,62 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) {
/// Prints a map_entries map type from its numeric value out into its string
/// format.
static void printMapClause(OpAsmPrinter &p, Operation *op,
- IntegerAttr mapType) {
- uint64_t mapTypeBits = mapType.getUInt();
-
- bool emitAllocRelease = true;
+ ClauseMapFlagsAttr mapType) {
llvm::SmallVector<std::string, 4> mapTypeStrs;
+ ClauseMapFlags mapFlags = mapType.getValue();
// handling of always, close, present placed at the beginning of the string
// to aid readability
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS))
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::always))
mapTypeStrs.push_back("always");
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::implicit))
mapTypeStrs.push_back("implicit");
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD))
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::ompx_hold))
mapTypeStrs.push_back("ompx_hold");
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE))
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::close))
mapTypeStrs.push_back("close");
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT))
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::present))
mapTypeStrs.push_back("present");
// special handling of to/from/tofrom/delete and release/alloc, release +
// alloc are the abscense of one of the other flags, whereas tofrom requires
// both the to and from flag to be set.
- bool to = mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
- bool from = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM);
- if (to && from) {
- emitAllocRelease = false;
+ bool to = mapTypeToBool(mapFlags, ClauseMapFlags::to);
+ bool from = mapTypeToBool(mapFlags, ClauseMapFlags::from);
+
+ if (to && from)
mapTypeStrs.push_back("tofrom");
- } else if (from) {
- emitAllocRelease = false;
+ else if (from)
mapTypeStrs.push_back("from");
- } else if (to) {
- emitAllocRelease = false;
+ else if (to)
mapTypeStrs.push_back("to");
- }
- if (mapTypeToBitFlag(mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)) {
- emitAllocRelease = false;
+
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::del))
mapTypeStrs.push_back("delete");
- }
- if (mapTypeToBitFlag(
- mapTypeBits,
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM)) {
- emitAllocRelease = false;
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::return_param))
mapTypeStrs.push_back("return_param");
- }
- if (emitAllocRelease)
- mapTypeStrs.push_back("exit_release_or_enter_alloc");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::storage))
+ mapTypeStrs.push_back("storage");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::priv))
+ mapTypeStrs.push_back("private");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::literal))
+ mapTypeStrs.push_back("literal");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::attach))
+ mapTypeStrs.push_back("attach");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_always))
+ mapTypeStrs.push_back("attach_always");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_none))
+ mapTypeStrs.push_back("attach_none");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::attach_auto))
+ mapTypeStrs.push_back("attach_auto");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptr))
+ mapTypeStrs.push_back("ref_ptr");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptee))
+ mapTypeStrs.push_back("ref_ptee");
+ if (mapTypeToBool(mapFlags, ClauseMapFlags::ref_ptr_ptee))
+ mapTypeStrs.push_back("ref_ptr_ptee");
+ if (mapFlags == ClauseMapFlags::none)
+ mapTypeStrs.push_back("none");
for (unsigned int i = 0; i < mapTypeStrs.size(); ++i) {
p << mapTypeStrs[i];
@@ -1963,21 +1991,15 @@ static LogicalResult verifyMapClause(Operation *op, OperandRange mapVars) {
return emitError(op->getLoc(), "missing map operation");
if (auto mapInfoOp = mapOp.getDefiningOp<mlir::omp::MapInfoOp>()) {
- uint64_t mapTypeBits = mapInfoOp.getMapType();
-
- bool to = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
- bool from = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM);
- bool del = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE);
-
- bool always = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS);
- bool close = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
- bool implicit = mapTypeToBitFlag(
- mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+ mlir::omp::ClauseMapFlags mapTypeBits = mapInfoOp.getMapType();
+
+ bool to = mapTypeToBool(mapTypeBits, ClauseMapFlags::to);
+ bool from = mapTypeToBool(mapTypeBits, ClauseMapFlags::from);
+ bool del = mapTypeToBool(mapTypeBits, ClauseMapFlags::del);
+
+ bool always = mapTypeToBool(mapTypeBits, ClauseMapFlags::always);
+ bool close = mapTypeToBool(mapTypeBits, ClauseMapFlags::close);
+ bool implicit = mapTypeToBool(mapTypeBits, ClauseMapFlags::implicit);
if ((isa<TargetDataOp>(op) || isa<TargetOp>(op)) && del)
return emitError(op->getLoc(),
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index 4d81918..776b5c6 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -378,10 +378,8 @@ struct SourceMgrDiagnosticHandlerImpl {
}
// Otherwise, try to load the source file.
- auto bufferOrErr = llvm::MemoryBuffer::getFile(filename);
- if (!bufferOrErr)
- return 0;
- unsigned id = mgr.AddNewSourceBuffer(std::move(*bufferOrErr), SMLoc());
+ std::string ignored;
+ unsigned id = mgr.AddIncludeFile(std::string(filename), SMLoc(), ignored);
filenameToBufId[filename] = id;
return id;
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8de49dd..b851414 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3833,6 +3833,58 @@ static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
}
+// Convert the MLIR map flag set to the runtime map flag set for embedding
+// in LLVM-IR. This is important as the two bit-flag lists do not correspond
+// 1-to-1 as there's flags the runtime doesn't care about and vice versa.
+// Certain flags are discarded here such as RefPtee and co.
+static llvm::omp::OpenMPOffloadMappingFlags
+convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
+ auto mapTypeToBool = [&mlirFlags](omp::ClauseMapFlags flag) {
+ return (mlirFlags & flag) == flag;
+ };
+
+ llvm::omp::OpenMPOffloadMappingFlags mapType =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::to))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::from))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::always))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::del))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::return_param))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::priv))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::literal))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::implicit))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::close))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::present))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
+
+ if (mapTypeToBool(omp::ClauseMapFlags::attach))
+ mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
+
+ return mapType;
+}
+
static void collectMapDataFromMapOperands(
MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
@@ -3880,8 +3932,7 @@ static void collectMapDataFromMapOperands(
getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
mapData.BaseType.back(), builder, moduleTranslation));
mapData.MapClause.push_back(mapOp.getOperation());
- mapData.Types.push_back(
- llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
+ mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType()));
mapData.Names.push_back(LLVM::createMappingInformation(
mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
@@ -3950,8 +4001,7 @@ static void collectMapDataFromMapOperands(
Value offloadPtr =
mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
- auto mapType =
- static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
+ auto mapType = convertClauseMapFlags(mapOp.getMapType());
auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
mapData.OriginalValue.push_back(origValue);
@@ -4299,8 +4349,7 @@ static void processMapMembersWithParent(
// in part as we currently have substantially less information on the data
// being mapped at this stage.
if (checkIfPointerMap(memberClause)) {
- auto mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
+ auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
@@ -4319,8 +4368,7 @@ static void processMapMembersWithParent(
// Same MemberOfFlag to indicate its link with parent and other members
// of.
- auto mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
+ auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 7d8ccd9..f2fbe91 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -216,22 +216,22 @@ func.func @task_depend(%arg0: !llvm.ptr) {
// CHECK: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr, %[[ARG2:.*]]: !llvm.ptr, %[[ARG3:.*]]: !llvm.ptr)
// CHECK: %[[MAP0:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
// CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
-// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
+// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(always, storage) capture(ByRef) -> !llvm.ptr {name = ""}
// CHECK: omp.target_enter_data map_entries(%[[MAP0]], %[[MAP1]], %[[MAP2]] : !llvm.ptr, !llvm.ptr, !llvm.ptr)
// CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
// CHECK: %[[MAP4:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
-// CHECK: %[[MAP5:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
+// CHECK: %[[MAP5:.*]] = omp.map.info var_ptr(%[[ARG2]] : !llvm.ptr, i32) map_clauses(storage) capture(ByRef) -> !llvm.ptr {name = ""}
// CHECK: %[[MAP6:.*]] = omp.map.info var_ptr(%[[ARG3]] : !llvm.ptr, i32) map_clauses(always, delete) capture(ByRef) -> !llvm.ptr {name = ""}
// CHECK: omp.target_exit_data map_entries(%[[MAP3]], %[[MAP4]], %[[MAP5]], %[[MAP6]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr)
llvm.func @_QPomp_target_data(%a : !llvm.ptr, %b : !llvm.ptr, %c : !llvm.ptr, %d : !llvm.ptr) {
%0 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
%1 = omp.map.info var_ptr(%b : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
- %2 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
+ %2 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(always, storage) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target_enter_data map_entries(%0, %1, %2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {}
%3 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
%4 = omp.map.info var_ptr(%b : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
- %5 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
+ %5 = omp.map.info var_ptr(%c : !llvm.ptr, i32) map_clauses(storage) capture(ByRef) -> !llvm.ptr {name = ""}
%6 = omp.map.info var_ptr(%d : !llvm.ptr, i32) map_clauses(always, delete) capture(ByRef) -> !llvm.ptr {name = ""}
omp.target_exit_data map_entries(%3, %4, %5, %6 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {}
llvm.return
@@ -266,7 +266,7 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) {
// CHECK: %[[ARG_1:.*]]: !llvm.ptr) {
// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(64 : i32) : i32
// CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""}
+// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""}
// CHECK: omp.target thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) {
// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32
// CHECK: llvm.store %[[VAL_1]], %[[BB_ARG1]] : i32, !llvm.ptr
@@ -278,7 +278,7 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) {
llvm.func @_QPomp_target(%a : !llvm.ptr, %i : !llvm.ptr) {
%0 = llvm.mlir.constant(64 : i32) : i32
%1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- %3 = omp.map.info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""}
+ %3 = omp.map.info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""}
omp.target thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) {
%2 = llvm.mlir.constant(10 : i32) : i32
llvm.store %2, %arg1 : i32, !llvm.ptr
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index cbd863f..ac29e20 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -828,11 +828,11 @@ func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %devic
// Test with optional map clause.
// CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(always, to) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- // CHECK: %[[MAP_C:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ // CHECK: %[[MAP_C:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target is_device_ptr(%[[VAL_4:.*]] : memref<i32>) has_device_addr(%[[MAP_A]] -> {{.*}} : memref<?xi32>) map_entries(%[[MAP_B]] -> {{.*}}, %[[MAP_C]] -> {{.*}} : memref<?xi32>, memref<?xi32>) {
%mapv0 = omp.map.info var_ptr(%device_addr : memref<?xi32>, tensor<?xi32>) map_clauses(always, to) capture(ByRef) -> memref<?xi32> {name = ""}
%mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
omp.target is_device_ptr(%device_ptr : memref<i32>) has_device_addr(%mapv0 -> %arg0 : memref<?xi32>) map_entries(%mapv1 -> %arg1, %mapv2 -> %arg2 : memref<?xi32>, memref<?xi32>) {
omp.terminator
}
@@ -868,20 +868,20 @@ func.func @omp_target_data (%if_cond : i1, %device : si32, %device_ptr: memref<i
}
// CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target_data map_entries(%[[MAP_A]], %[[MAP_B]] : memref<?xi32>, memref<?xi32>)
%mapv3 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- %mapv4 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv4 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
omp.target_data map_entries(%mapv3, %mapv4 : memref<?xi32>, memref<?xi32>) {}
- // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target_enter_data device(%[[VAL_1:.*]] : si32) if(%[[VAL_0:.*]]) map_entries(%[[MAP_A]] : memref<?xi32>) nowait
- %mapv5 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv5 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
omp.target_enter_data if(%if_cond) device(%device : si32) nowait map_entries(%mapv5 : memref<?xi32>)
- // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target_exit_data device(%[[VAL_1:.*]] : si32) if(%[[VAL_0:.*]]) map_entries(%[[MAP_A]] : memref<?xi32>) nowait
- %mapv6 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv6 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
omp.target_exit_data if(%if_cond) device(%device : si32) nowait map_entries(%mapv6 : memref<?xi32>)
// CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref<?xi32>, tensor<?xi32>) map_clauses(ompx_hold, to) capture(ByRef) -> memref<?xi32> {name = ""}
@@ -2790,13 +2790,13 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> ()
// CHECK: %[[C_12:.*]] = llvm.mlir.constant(2 : index) : i64
// CHECK: %[[C_13:.*]] = llvm.mlir.constant(2 : index) : i64
// CHECK: %[[BOUNDS1:.*]] = omp.map.bounds lower_bound(%[[C_11]] : i64) upper_bound(%[[C_10]] : i64) stride(%[[C_12]] : i64) start_idx(%[[C_13]] : i64)
- // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByCopy) mapper(@my_mapper) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""}
+ // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(ByCopy) mapper(@my_mapper) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""}
%6 = llvm.mlir.constant(9 : index) : i64
%7 = llvm.mlir.constant(1 : index) : i64
%8 = llvm.mlir.constant(2 : index) : i64
%9 = llvm.mlir.constant(2 : index) : i64
%10 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%8 : i64) start_idx(%9 : i64)
- %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByCopy) mapper(@my_mapper) bounds(%10) -> !llvm.ptr {name = ""}
+ %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(ByCopy) mapper(@my_mapper) bounds(%10) -> !llvm.ptr {name = ""}
// CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr)
omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) {
@@ -2806,14 +2806,14 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> ()
// CHECK: omp.target_data map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr, !llvm.ptr)
omp.target_data map_entries(%mapv1, %mapv2 : !llvm.ptr, !llvm.ptr){}
- // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(VLAType) bounds(%[[BOUNDS0]]) -> !llvm.ptr {name = ""}
+ // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(VLAType) bounds(%[[BOUNDS0]]) -> !llvm.ptr {name = ""}
// CHECK: omp.target_enter_data map_entries(%[[MAP2]] : !llvm.ptr)
- %mapv3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(VLAType) bounds(%4) -> !llvm.ptr {name = ""}
+ %mapv3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(VLAType) bounds(%4) -> !llvm.ptr {name = ""}
omp.target_enter_data map_entries(%mapv3 : !llvm.ptr){}
- // CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(This) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""}
+ // CHECK: %[[MAP3:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(This) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""}
// CHECK: omp.target_exit_data map_entries(%[[MAP3]] : !llvm.ptr)
- %mapv4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(This) bounds(%10) -> !llvm.ptr {name = ""}
+ %mapv4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(This) bounds(%10) -> !llvm.ptr {name = ""}
omp.target_exit_data map_entries(%mapv4 : !llvm.ptr){}
return
@@ -2852,7 +2852,7 @@ func.func @omp_target_enter_update_exit_data_depend(%a: memref<?xi32>, %b: memre
// CHECK-NEXT: [[MAP2:%.*]] = omp.map.info
%map_a = omp.map.info var_ptr(%a: memref<?xi32>, tensor<?xi32>) map_clauses(to) capture(ByRef) -> memref<?xi32>
%map_b = omp.map.info var_ptr(%b: memref<?xi32>, tensor<?xi32>) map_clauses(from) capture(ByRef) -> memref<?xi32>
- %map_c = omp.map.info var_ptr(%c: memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32>
+ %map_c = omp.map.info var_ptr(%c: memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32>
// Do some work on the host that writes to 'a'
omp.task depend(taskdependout -> %a : memref<?xi32>) {
@@ -3014,7 +3014,7 @@ func.func @parallel_op_reduction_and_private(%priv_var: !llvm.ptr, %priv_var2: !
// CHECK-LABEL: omp_target_private
func.func @omp_target_private(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_var: !llvm.ptr) -> () {
%mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target
// CHECK-SAME: private(
@@ -3047,7 +3047,7 @@ func.func @omp_target_private(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_
// CHECK-LABEL: omp_target_private_with_map_idx
func.func @omp_target_private_with_map_idx(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_var: !llvm.ptr) -> () {
%mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
- %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
// CHECK: omp.target
@@ -3321,3 +3321,49 @@ func.func @omp_workdistribute() {
}
return
}
+
+func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () {
+ // Test new map clause additions
+ // CHECK: %{{.*}}map_clauses(none){{.*}}
+ // CHECK: %{{.*}}map_clauses(to){{.*}}
+ // CHECK: %{{.*}}map_clauses(from){{.*}}
+ // CHECK: %{{.*}}map_clauses(tofrom){{.*}}
+ // CHECK: %{{.*}}map_clauses(storage){{.*}}
+ // CHECK: %{{.*}}map_clauses(delete){{.*}}
+ // CHECK: %{{.*}}map_clauses(return_param){{.*}}
+ // CHECK: %{{.*}}map_clauses(private){{.*}}
+ // CHECK: %{{.*}}map_clauses(literal){{.*}}
+ // CHECK: %{{.*}}map_clauses(implicit){{.*}}
+ // CHECK: %{{.*}}map_clauses(close){{.*}}
+ // CHECK: %{{.*}}map_clauses(present){{.*}}
+ // CHECK: %{{.*}}map_clauses(ompx_hold){{.*}}
+ // CHECK: %{{.*}}map_clauses(attach){{.*}}
+ // CHECK: %{{.*}}map_clauses(attach_always){{.*}}
+ // CHECK: %{{.*}}map_clauses(attach_none){{.*}}
+ // CHECK: %{{.*}}map_clauses(attach_auto){{.*}}
+ // CHECK: %{{.*}}map_clauses(ref_ptr){{.*}}
+ // CHECK: %{{.*}}map_clauses(ref_ptee){{.*}}
+ // CHECK: %{{.*}}map_clauses(ref_ptr_ptee){{.*}}
+ %mapv0 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(none) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv1 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(to) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv2 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(from) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv3 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv4 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(storage) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv5 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(delete) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv6 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(return_param) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv7 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(private) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv8 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(literal) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv9 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(implicit) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv10 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(close) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv11 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(present) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv12 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ompx_hold) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv13 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv14 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_always) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv15 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_none) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv16 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(attach_auto) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv17 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptr) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv18 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptee) capture(ByRef) -> memref<?xi32> {name = ""}
+ %mapv19 = omp.map.info var_ptr(%arg0 : memref<?xi32>, tensor<?xi32>) map_clauses(ref_ptr_ptee) capture(ByRef) -> memref<?xi32> {name = ""}
+
+ return
+}
diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index ec4d762..8935975 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -18,6 +18,7 @@
#include "kmp_config.h"
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+.att_syntax
# if defined(__ELF__) && defined(__CET__) && defined(__has_include)
# if __has_include(<cet.h>)
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 5d87e32..2d9433f 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -3092,8 +3092,12 @@ libc_support_library(
libc_support_library(
name = "__support_sincosf_utils",
- hdrs = ["src/__support/math/sincosf_utils.h"],
+ hdrs = [
+ "src/__support/math/sincosf_utils.h",
+ "src/__support/math/sincosf_float_eval.h",
+ ],
deps = [
+ ":__support_fputil_double_double",
":__support_fputil_fp_bits",
":__support_fputil_polyeval",
":__support_range_reduction",