aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/CommandGuide/llc.rst6
-rw-r--r--llvm/docs/ReleaseNotes.md1
-rw-r--r--llvm/docs/SPIRVUsage.rst2
-rw-r--r--llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp1
-rw-r--r--llvm/include/llvm/Analysis/DominanceFrontierImpl.h1
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeWriter.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h1
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h1
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h1
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h1
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h1
-rw-r--r--llvm/include/llvm/SandboxIR/Instruction.h113
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h1
-rw-r--r--llvm/include/llvm/TargetParser/RISCVISAInfo.h1
-rw-r--r--llvm/include/llvm/Telemetry/Telemetry.h1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp2
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h2
-rw-r--r--llvm/lib/SandboxIR/Instruction.cpp27
-rw-r--r--llvm/lib/Support/DeltaAlgorithm.cpp1
-rw-r--r--llvm/lib/Support/MD5.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp18
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp23
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp78
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td83
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp46
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td3
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp14
-rw-r--r--llvm/test/CodeGen/AArch64/neon-mov.ll128
-rw-r--r--llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll68
-rw-r--r--llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll2239
-rw-r--r--llvm/test/CodeGen/LoongArch/expandmemcmp.ll3106
-rw-r--r--llvm/test/CodeGen/LoongArch/memcmp.ll27
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll56
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll180
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll66
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll292
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll66
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll268
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll492
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll222
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll428
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll702
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll198
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll392
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll34
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll28
-rw-r--r--llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll104
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll102
-rw-r--r--llvm/test/tools/llc/save-stats.ll16
-rw-r--r--llvm/tools/dsymutil/DwarfLinkerForBinary.cpp1
-rw-r--r--llvm/tools/gold/gold-plugin.cpp1
-rw-r--r--llvm/tools/llc/llc.cpp74
-rw-r--r--llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h1
-rw-r--r--llvm/tools/llvm-ifs/llvm-ifs.cpp1
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp3
-rw-r--r--llvm/tools/llvm-rc/ResourceFileWriter.h2
-rw-r--r--llvm/tools/llvm-rc/ResourceScriptToken.h1
-rw-r--r--llvm/unittests/Analysis/IR2VecTest.cpp1
-rw-r--r--llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp1
-rw-r--r--llvm/unittests/Support/ParallelTest.cpp1
-rw-r--r--llvm/utils/TableGen/DFAEmitter.cpp1
-rw-r--r--llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp1
-rw-r--r--llvm/utils/gn/secondary/bolt/lib/Rewrite/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn6
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/performance/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn4
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn7
92 files changed, 9440 insertions, 383 deletions
diff --git a/llvm/docs/CommandGuide/llc.rst b/llvm/docs/CommandGuide/llc.rst
index cc670f6..ffcccfb 100644
--- a/llvm/docs/CommandGuide/llc.rst
+++ b/llvm/docs/CommandGuide/llc.rst
@@ -129,6 +129,12 @@ End-user Options
Print statistics recorded by code-generation passes.
+.. option:: --save-stats, --save-stats=cwd, --save-stats=obj
+
+ Save LLVM statistics to a file in the current directory
+ (:option:`--save-stats`/"--save-stats=cwd") or the directory
+ of the output file ("--save-stats=obj") in JSON format.
+
.. option:: --time-passes
Record the amount of time needed for each pass and print a report to standard
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 23bba99..fd78c97 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -182,6 +182,7 @@ Changes to the LLVM tools
* `llvm-readelf` now dumps all hex format values in lower-case mode.
* Some code paths for supporting Python 2.7 in `llvm-lit` have been removed.
* Support for `%T` in lit has been removed.
+* Add `--save-stats` option to `llc` to save LLVM statistics to a file. Compatible with the Clang option.
* `llvm-config` gained a new flag `--quote-paths` which quotes and escapes paths
emitted on stdout, to account for spaces or other special characters in path.
diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst
index 9ecd390..5ee3d83 100644
--- a/llvm/docs/SPIRVUsage.rst
+++ b/llvm/docs/SPIRVUsage.rst
@@ -167,6 +167,8 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
- Adds atomic add instruction on floating-point numbers.
* - ``SPV_EXT_shader_atomic_float_min_max``
- Adds atomic min and max instruction on floating-point numbers.
+ * - ``SPV_INTEL_16bit_atomics``
+ - Extends the SPV_EXT_shader_atomic_float_add and SPV_EXT_shader_atomic_float_min_max to support addition, minimum and maximum on 16-bit `bfloat16` floating-point numbers in memory.
* - ``SPV_INTEL_2d_block_io``
- Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory.
* - ``SPV_INTEL_arbitrary_precision_integers``
diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp
index 15dca0a..6132149 100644
--- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp
+++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ThreadPool.h"
-#include <list>
#include <string>
using namespace llvm;
diff --git a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
index e877b2c..871dd95 100644
--- a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -24,7 +24,6 @@
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <set>
#include <utility>
#include <vector>
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
index e9b5737..1e72e84 100644
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -19,7 +19,6 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBufferRef.h"
-#include <map>
#include <memory>
#include <string>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index b7d6e72..bd204f6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -28,7 +28,6 @@
#include <cstdint>
#include <map>
#include <memory>
-#include <set>
#include <utility>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h
index c571112..0a1300b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h
@@ -17,7 +17,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
-#include <map>
#include <memory>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
index 4caf123..7e3fd18 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
@@ -20,7 +20,6 @@
#include "llvm/DebugInfo/LogicalView/Core/LVSupport.h"
#include "llvm/Support/Compiler.h"
#include <limits>
-#include <list>
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
index 2e2619c..7897883 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
@@ -20,7 +20,6 @@
#include "llvm/DebugInfo/LogicalView/Core/LVSort.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Compiler.h"
-#include <list>
#include <map>
#include <set>
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index f964d00..be8cb92 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -44,10 +44,8 @@
#include <cassert>
#include <functional>
#include <iterator>
-#include <list>
#include <memory>
#include <optional>
-#include <set>
#include <utility>
namespace llvm {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 8c6a8f5..a0499f7 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -26,7 +26,6 @@
#include <algorithm>
#include <cassert>
#include <functional>
-#include <list>
#include <memory>
#include <utility>
#include <vector>
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 7886478..f59ddc3 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -41,7 +41,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <list>
#include <memory>
#include <string>
#include <system_error>
diff --git a/llvm/include/llvm/SandboxIR/Instruction.h b/llvm/include/llvm/SandboxIR/Instruction.h
index 5e369a4..d928068 100644
--- a/llvm/include/llvm/SandboxIR/Instruction.h
+++ b/llvm/include/llvm/SandboxIR/Instruction.h
@@ -1884,22 +1884,96 @@ public:
return cast<llvm::SwitchInst>(Val)->getNumCases();
}
+ template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+ class CaseItImpl;
+
+ // The template helps avoid code duplication for const and non-const
+ // CaseHandle variants.
+ template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+ class CaseHandleImpl {
+ Context &Ctx;
+ // NOTE: We are not wrapping an LLVM CaseHande here because it is not
+ // default-constructible. Instead we are wrapping the LLVM CaseIt
+ // iterator, as we can always get an LLVM CaseHandle by de-referencing it.
+ LLVMCaseItT LLVMCaseIt;
+ template <typename T1, typename T2, typename T3> friend class CaseItImpl;
+
+ public:
+ CaseHandleImpl(Context &Ctx, LLVMCaseItT LLVMCaseIt)
+ : Ctx(Ctx), LLVMCaseIt(LLVMCaseIt) {}
+ ConstT *getCaseValue() const;
+ BlockT *getCaseSuccessor() const;
+ unsigned getCaseIndex() const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ return LLVMCaseHandle.getCaseIndex();
+ }
+ unsigned getSuccessorIndex() const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ return LLVMCaseHandle.getSuccessorIndex();
+ }
+ };
+
+ // The template helps avoid code duplication for const and non-const CaseIt
+ // variants.
+ template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+ class CaseItImpl : public iterator_facade_base<
+ CaseItImpl<LLVMCaseItT, BlockT, ConstT>,
+ std::random_access_iterator_tag,
+ const CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>> {
+ CaseHandleImpl<LLVMCaseItT, BlockT, ConstT> CH;
+
+ public:
+ CaseItImpl(Context &Ctx, LLVMCaseItT It) : CH(Ctx, It) {}
+ CaseItImpl(SwitchInst *SI, ptrdiff_t CaseNum)
+ : CH(SI->getContext(), llvm::SwitchInst::CaseIt(
+ cast<llvm::SwitchInst>(SI->Val), CaseNum)) {}
+ CaseItImpl &operator+=(ptrdiff_t N) {
+ CH.LLVMCaseIt += N;
+ return *this;
+ }
+ CaseItImpl &operator-=(ptrdiff_t N) {
+ CH.LLVMCaseIt -= N;
+ return *this;
+ }
+ ptrdiff_t operator-(const CaseItImpl &Other) const {
+ return CH.LLVMCaseIt - Other.CH.LLVMCaseIt;
+ }
+ bool operator==(const CaseItImpl &Other) const {
+ return CH.LLVMCaseIt == Other.CH.LLVMCaseIt;
+ }
+ bool operator<(const CaseItImpl &Other) const {
+ return CH.LLVMCaseIt < Other.CH.LLVMCaseIt;
+ }
+ const CaseHandleImpl<LLVMCaseItT, BlockT, ConstT> &operator*() const {
+ return CH;
+ }
+ };
+
using CaseHandle =
- llvm::SwitchInst::CaseHandleImpl<SwitchInst, ConstantInt, BasicBlock>;
- using ConstCaseHandle =
- llvm::SwitchInst::CaseHandleImpl<const SwitchInst, const ConstantInt,
- const BasicBlock>;
- using CaseIt = llvm::SwitchInst::CaseIteratorImpl<CaseHandle>;
- using ConstCaseIt = llvm::SwitchInst::CaseIteratorImpl<ConstCaseHandle>;
+ CaseHandleImpl<llvm::SwitchInst::CaseIt, BasicBlock, ConstantInt>;
+ using CaseIt = CaseItImpl<llvm::SwitchInst::CaseIt, BasicBlock, ConstantInt>;
+
+ using ConstCaseHandle = CaseHandleImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
+ using ConstCaseIt = CaseItImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
/// Returns a read/write iterator that points to the first case in the
/// SwitchInst.
- CaseIt case_begin() { return CaseIt(this, 0); }
- ConstCaseIt case_begin() const { return ConstCaseIt(this, 0); }
+ CaseIt case_begin() {
+ return CaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_begin());
+ }
+ ConstCaseIt case_begin() const {
+ return ConstCaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_begin());
+ }
/// Returns a read/write iterator that points one past the last in the
/// SwitchInst.
- CaseIt case_end() { return CaseIt(this, getNumCases()); }
- ConstCaseIt case_end() const { return ConstCaseIt(this, getNumCases()); }
+ CaseIt case_end() {
+ return CaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_end());
+ }
+ ConstCaseIt case_end() const {
+ return ConstCaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_end());
+ }
/// Iteration adapter for range-for loops.
iterator_range<CaseIt> cases() {
return make_range(case_begin(), case_end());
@@ -1907,22 +1981,19 @@ public:
iterator_range<ConstCaseIt> cases() const {
return make_range(case_begin(), case_end());
}
- CaseIt case_default() { return CaseIt(this, DefaultPseudoIndex); }
+ CaseIt case_default() {
+ return CaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_default());
+ }
ConstCaseIt case_default() const {
- return ConstCaseIt(this, DefaultPseudoIndex);
+ return ConstCaseIt(Ctx, cast<llvm::SwitchInst>(Val)->case_default());
}
CaseIt findCaseValue(const ConstantInt *C) {
- return CaseIt(
- this,
- const_cast<const SwitchInst *>(this)->findCaseValue(C)->getCaseIndex());
+ const llvm::ConstantInt *LLVMC = cast<llvm::ConstantInt>(C->Val);
+ return CaseIt(Ctx, cast<llvm::SwitchInst>(Val)->findCaseValue(LLVMC));
}
ConstCaseIt findCaseValue(const ConstantInt *C) const {
- ConstCaseIt I = llvm::find_if(cases(), [C](const ConstCaseHandle &Case) {
- return Case.getCaseValue() == C;
- });
- if (I != case_end())
- return I;
- return case_default();
+ const llvm::ConstantInt *LLVMC = cast<llvm::ConstantInt>(C->Val);
+ return ConstCaseIt(Ctx, cast<llvm::SwitchInst>(Val)->findCaseValue(LLVMC));
}
LLVM_ABI ConstantInt *findCaseDest(BasicBlock *BB);
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 7e68ad2..7da529e 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -23,7 +23,6 @@
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/SubtargetFeature.h"
-#include <array>
#include <set>
#include <vector>
diff --git a/llvm/include/llvm/TargetParser/RISCVISAInfo.h b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
index 0c308ca..20dbb60 100644
--- a/llvm/include/llvm/TargetParser/RISCVISAInfo.h
+++ b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
@@ -15,7 +15,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/RISCVISAUtils.h"
-#include <map>
#include <set>
#include <string>
#include <vector>
diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index 708ec43..b20c7e2 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -19,7 +19,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
-#include <map>
#include <memory>
#include <optional>
#include <string>
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4f6a19f..d656f10 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3482,7 +3482,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
bool IRTranslator::translateAtomicRMW(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (!MF->getTarget().getTargetTriple().isSPIRV() && containsBF16Type(U))
return false;
const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbc1d73..80bbfea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -76,7 +76,6 @@
#include <cstdlib>
#include <limits>
#include <optional>
-#include <set>
#include <string>
#include <utility>
#include <vector>
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index 91b3dd5..c82edd9 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -15,8 +15,6 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/TimeProfiler.h"
-#include <map>
-
using namespace llvm;
using namespace llvm::msf;
using namespace llvm::support;
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 0aefe6e..769dbd4 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -21,7 +21,6 @@
#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
-#include <array>
#include <cctype>
#include <cstdio>
#include <optional>
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
index 55442e0..50ba2f8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -23,8 +23,6 @@
#define DEBUG_TYPE "jitlink"
-#include <list>
-
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 343218e..91021e4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -21,8 +21,6 @@
#include "EHFrameSupportImpl.h"
#include "JITLinkGeneric.h"
-#include <list>
-
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp
index 1a81d18..9ae4c98 100644
--- a/llvm/lib/SandboxIR/Instruction.cpp
+++ b/llvm/lib/SandboxIR/Instruction.cpp
@@ -1125,6 +1125,33 @@ void SwitchInst::setDefaultDest(BasicBlock *DefaultCase) {
cast<llvm::SwitchInst>(Val)->setDefaultDest(
cast<llvm::BasicBlock>(DefaultCase->Val));
}
+
+template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+ConstT *
+SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseValue() const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ auto *LLVMC = Ctx.getValue(LLVMCaseHandle.getCaseValue());
+ return cast<ConstT>(LLVMC);
+}
+
+template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+BlockT *
+SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseSuccessor()
+ const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ auto *LLVMBB = LLVMCaseHandle.getCaseSuccessor();
+ return cast<BlockT>(Ctx.getValue(LLVMBB));
+}
+
+template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::CaseIt, BasicBlock,
+ ConstantInt>;
+template class SwitchInst::CaseItImpl<llvm::SwitchInst::CaseIt, BasicBlock,
+ ConstantInt>;
+template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
+template class SwitchInst::CaseItImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
+
ConstantInt *SwitchInst::findCaseDest(BasicBlock *BB) {
auto *LLVMC = cast<llvm::SwitchInst>(Val)->findCaseDest(
cast<llvm::BasicBlock>(BB->Val));
diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp
index d763cde..e91ee91 100644
--- a/llvm/lib/Support/DeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -8,7 +8,6 @@
#include "llvm/ADT/DeltaAlgorithm.h"
#include <algorithm>
#include <iterator>
-#include <set>
using namespace llvm;
DeltaAlgorithm::~DeltaAlgorithm() = default;
diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index 3bff4e1..32e2a2e 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -43,7 +43,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
-#include <array>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8a038f..76a790dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26050,7 +26050,7 @@ static SDValue performCSELCombine(SDNode *N,
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
- return Folded;
+ return Folded;
// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 14b0f9a..3940246 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
+ assert((DstSize == 64 || DstSize == 128) &&
+ "Unexpected vector constant size");
+
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
@@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
- Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ Register NewDst = MRI.createVirtualRegister(
+ DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
NewOp->getOperand(0).setReg(NewDst);
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
}
return nullptr;
};
MachineInstr *R;
- if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
- (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
+ if ((R = TryWithFNeg(DefBits, 32,
+ DstSize == 64 ? AArch64::FNEGv2f32
+ : AArch64::FNEGv4f32)) ||
+ (R = TryWithFNeg(DefBits, 64,
+ DstSize == 64 ? AArch64::FNEGDr
+ : AArch64::FNEGv2f64)) ||
(STI.hasFullFP16() &&
- (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
+ (R = TryWithFNeg(DefBits, 16,
+ DstSize == 64 ? AArch64::FNEGv4f16
+ : AArch64::FNEGv8f16))))
return R;
}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e67db8e..b119146 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1402,7 +1402,7 @@ static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::createImm(U | (imm << 4) | Rm));
} else {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(U));
}
@@ -1922,7 +1922,7 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
imm |= fieldFromInstruction(Insn, 24, 1) << 1;
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
+ Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
return S;
}
@@ -3703,17 +3703,17 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
} else if (Inst.getOpcode() == ARM::tADDspr) {
unsigned Rm = fieldFromInstruction(Insn, 3, 4);
Inst.addOperand(MCOperand::createReg(ARM::SP));
Inst.addOperand(MCOperand::createReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
}
return S;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
index 8707b08..f2c00c7 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
@@ -18,7 +18,6 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBufferRef.h"
-#include <map>
#include <memory>
#include <string>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 526b4de..04a9760 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
return AtomicExpansionKind::LLSC;
}
+
+bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit since
+ // this will fold the and/cmp/br into a single tstbit instruction.
+ ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getValue().isPowerOf2();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8d04edb..4ac3e76 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -160,6 +160,8 @@ public:
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
/// Return true if an FMA operation is faster than a pair of mul and add
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true (and FMAs are legal), otherwise fmuladd is
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 6dd83c1..2ee3b9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -198,7 +198,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
// Reaching Def to an offset register can't be a phi.
if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
MI.getParent() != UseMI.getParent())
- return false;
+ return false;
const MCInstrDesc &UseMID = UseMI.getDesc();
if ((!UseMID.mayLoad() && !UseMID.mayStore()) ||
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index ce2de75..a3c8a88 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -28,7 +28,6 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <map>
#include <optional>
using namespace llvm;
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index f548a8d..5107c8d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
}
}
-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+
+ if (!ST->hasUAL())
+ return Options;
+
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = Options.MaxNumLoads;
+ Options.AllowOverlappingLoads = true;
+
+ // TODO: Support for vectors.
+ if (ST->is64Bit()) {
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.AllowedTailExpansions = {3, 5, 6};
+ } else {
+ Options.LoadSizes = {4, 2, 1};
+ Options.AllowedTailExpansions = {3};
+ }
+
+ return Options;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index e3f16c7..9b479f9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -55,7 +55,8 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const override;
- // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+ TTI::MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 5d08f56..aa94f54 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -405,9 +405,9 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
}
if (SecondRegSaved)
copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+ } else {
+ Available.reset(SpReg);
}
- else
- Available.reset(SpReg);
copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
BuildMI(MBB, II, DL, get(Mips::AdduRxRyRz16), Reg)
.addReg(SpReg, RegState::Kill)
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index d92ae8d..21d7768 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -25,7 +25,6 @@
#include "llvm/Support/Alignment.h"
#include "llvm/Support/FormatVariadic.h"
#include <cstdarg>
-#include <set>
#include <string>
namespace llvm {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1977d33..a3ccbd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,11 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
+// TODO: Support more ops
+static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS,
+ ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -1208,6 +1213,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
+ // Sets common actions for zvfbfa, some of instructions are supported
+ // natively so that we don't need to promote them.
+ const auto SetZvfbfaActions = [&](MVT VT) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+ ISD::VECTOR_COMPRESS},
+ VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
+ setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+
+ MVT EltVT = VT.getVectorElementType();
+ if (isTypeLegal(EltVT))
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+ else
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
+ EltVT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
+ ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
+ ISD::VP_SCATTER},
+ VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(FloatingPointLibCallOps, VT, Expand);
+
+ // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+ if (getLMUL(VT) == RISCVVType::LMUL_8) {
+ setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+ } else {
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+ }
+ };
+
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
@@ -1222,7 +1282,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasVInstructionsBF16Minimal()) {
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ SetZvfbfaActions(VT);
+ }
+ } else if (Subtarget.hasVInstructionsBF16Minimal()) {
for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
@@ -1501,6 +1567,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
}
+ if (Subtarget.hasStdExtZvfbfa()) {
+ setOperationAction(ZvfbfaOps, VT, Custom);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+ }
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
@@ -7245,7 +7315,11 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
return (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
- Op.getValueType() == MVT::nxv32bf16;
+ (Op.getValueType() == MVT::nxv32bf16 &&
+ Subtarget.hasVInstructionsBF16Minimal() &&
+ (!Subtarget.hasVInstructionsBF16() ||
+ (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
+ !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
}
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index b9c5b75..ffb2ac0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
-}
+
+ foreach vti = AllBF16Vectors in {
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg vti.RegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (riscv_fneg_vl vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ srcvalue,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatFPOp vti.ScalarRegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index f681b0d..ac09b93 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -29,6 +29,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float16_add},
{"SPV_EXT_shader_atomic_float_min_max",
SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float_min_max},
+ {"SPV_INTEL_16bit_atomics",
+ SPIRV::Extension::Extension::SPV_INTEL_16bit_atomics},
{"SPV_EXT_arithmetic_fence",
SPIRV::Extension::Extension::SPV_EXT_arithmetic_fence},
{"SPV_EXT_demote_to_helper_invocation",
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index af76016..fbb127d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1058,6 +1058,13 @@ static void addOpTypeImageReqs(const MachineInstr &MI,
}
}
+static bool isBFloat16Type(const SPIRVType *TypeDef) {
+ return TypeDef && TypeDef->getNumOperands() == 3 &&
+ TypeDef->getOpcode() == SPIRV::OpTypeFloat &&
+ TypeDef->getOperand(1).getImm() == 16 &&
+ TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR;
+}
+
// Add requirements for handling atomic float instructions
#define ATOM_FLT_REQ_EXT_MSG(ExtName) \
"The atomic float instruction requires the following SPIR-V " \
@@ -1081,11 +1088,21 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_add);
switch (BitWidth) {
case 16:
- if (!ST.canUseExtension(
- SPIRV::Extension::SPV_EXT_shader_atomic_float16_add))
- report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false);
- Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add);
- Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT);
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics))
+ report_fatal_error(
+ "The atomic bfloat16 instruction requires the following SPIR-V "
+ "extension: SPV_INTEL_16bit_atomics",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics);
+ Reqs.addCapability(SPIRV::Capability::AtomicBFloat16AddINTEL);
+ } else {
+ if (!ST.canUseExtension(
+ SPIRV::Extension::SPV_EXT_shader_atomic_float16_add))
+ report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false);
+ Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add);
+ Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT);
+ }
break;
case 32:
Reqs.addCapability(SPIRV::Capability::AtomicFloat32AddEXT);
@@ -1104,7 +1121,17 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_min_max);
switch (BitWidth) {
case 16:
- Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT);
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics))
+ report_fatal_error(
+ "The atomic bfloat16 instruction requires the following SPIR-V "
+ "extension: SPV_INTEL_16bit_atomics",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics);
+ Reqs.addCapability(SPIRV::Capability::AtomicBFloat16MinMaxINTEL);
+ } else {
+ Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT);
+ }
break;
case 32:
Reqs.addCapability(SPIRV::Capability::AtomicFloat32MinMaxEXT);
@@ -1328,13 +1355,6 @@ void addPrintfRequirements(const MachineInstr &MI,
}
}
-static bool isBFloat16Type(const SPIRVType *TypeDef) {
- return TypeDef && TypeDef->getNumOperands() == 3 &&
- TypeDef->getOpcode() == SPIRV::OpTypeFloat &&
- TypeDef->getOperand(1).getImm() == 16 &&
- TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR;
-}
-
void addInstrRequirements(const MachineInstr &MI,
SPIRV::ModuleAnalysisInfo &MAI,
const SPIRVSubtarget &ST) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 65a8885..f02a587 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -389,6 +389,7 @@ defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>;
defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>;
defm SPV_INTEL_bfloat16_arithmetic
: ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>;
+defm SPV_INTEL_16bit_atomics : ExtensionOperand<130, [EnvVulkan, EnvOpenCL]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -566,9 +567,11 @@ defm FloatControls2
defm AtomicFloat32AddEXT : CapabilityOperand<6033, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
defm AtomicFloat64AddEXT : CapabilityOperand<6034, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
defm AtomicFloat16AddEXT : CapabilityOperand<6095, 0, 0, [SPV_EXT_shader_atomic_float16_add], []>;
+defm AtomicBFloat16AddINTEL : CapabilityOperand<6255, 0, 0, [SPV_INTEL_16bit_atomics], []>;
defm AtomicFloat16MinMaxEXT : CapabilityOperand<5616, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
defm AtomicFloat32MinMaxEXT : CapabilityOperand<5612, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
defm AtomicFloat64MinMaxEXT : CapabilityOperand<5613, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
+defm AtomicBFloat16MinMaxINTEL : CapabilityOperand<6256, 0, 0, [SPV_INTEL_16bit_atomics], []>;
defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variable_length_array], []>;
defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>;
defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>;
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index f08a0c0..94ae64c 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -14,7 +14,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
-#include <array>
#include <atomic>
#include <optional>
#include <string>
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bd74388..8e76b79 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -83,7 +83,6 @@
#include <cstdint>
#include <functional>
#include <limits>
-#include <map>
#include <memory>
#include <queue>
#include <string>
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 27ec6c6..5bc9c28 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5627,8 +5627,15 @@ bool InstCombinerImpl::run() {
for (Use &U : I->uses()) {
User *User = U.getUser();
- if (User->isDroppable())
- continue;
+ if (User->isDroppable()) {
+ // Do not sink if there are dereferenceable assumes that would be
+ // removed.
+ auto II = dyn_cast<IntrinsicInst>(User);
+ if (II->getIntrinsicID() != Intrinsic::assume ||
+ !II->getOperandBundle("dereferenceable"))
+ continue;
+ }
+
if (NumUsers > MaxSinkNumUsers)
return std::nullopt;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 666033b..45b5570 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8157,9 +8157,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands());
if (Instruction::isCast(VPI->getOpcode())) {
+ auto *CastR = cast<VPInstructionWithType>(R);
auto *CI = cast<CastInst>(Instr);
return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0),
- CI->getType(), *CI);
+ CastR->getResultType(), *CI);
}
return tryToWiden(VPI);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f405c40..663e31a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
#define DEBUG_TYPE "vplan"
@@ -233,10 +234,15 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
for (Value *Op : Inst->operands())
VPOperands.push_back(getOrCreateVPOperand(Op));
- // Build VPInstruction for any arbitrary Instruction without specific
- // representation in VPlan.
- NewR = cast<VPInstruction>(
- VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ if (auto *CI = dyn_cast<CastInst>(Inst)) {
+ NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
+ CI->getType(), CI->getDebugLoc());
+ NewR->setUnderlyingValue(CI);
+ } else {
+ // Build VPInstruction for any arbitrary Instruction without specific
+ // representation in VPlan.
+ NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst);
+ }
}
IRDef2VPValue[Inst] = NewR;
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 5be9394..4f65786 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -76,6 +76,15 @@ define <2 x i32> @movi2s_lsl16() {
ret <2 x i32> <i32 16711680, i32 16711680>
}
+define <2 x i32> @movi2s_fneg() {
+; CHECK-LABEL: movi2s_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2s, #240, lsl #8
+; CHECK-NEXT: fneg v0.2s, v0.2s
+; CHECK-NEXT: ret
+ ret <2 x i32> <i32 2147545088, i32 2147545088>
+}
+
define <2 x i32> @movi2s_lsl24() {
; CHECK-LABEL: movi2s_lsl24:
; CHECK: // %bb.0:
@@ -149,6 +158,33 @@ define <4 x i16> @movi4h_lsl8() {
ret <4 x i16> <i16 65280, i16 65280, i16 65280, i16 65280>
}
+define <4 x i16> @movi4h_fneg() {
+; CHECK-NOFP16-SD-LABEL: movi4h_fneg:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8
+; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: movi4h_fneg:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8
+; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: movi4h_fneg:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
+; CHECK-NOFP16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: movi4h_fneg:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
+; CHECK-FP16-GI-NEXT: ret
+ ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280>
+}
+
define <8 x i16> @movi8h_lsl0() {
; CHECK-LABEL: movi8h_lsl0:
; CHECK: // %bb.0:
@@ -180,14 +216,14 @@ define <8 x i16> @movi8h_fneg() {
;
; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi8h_fneg:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
}
@@ -275,6 +311,27 @@ define <4 x i16> @mvni4h_lsl8() {
ret <4 x i16> <i16 61439, i16 61439, i16 61439, i16 61439>
}
+define <4 x i16> @mvni4h_neg() {
+; CHECK-NOFP16-SD-LABEL: mvni4h_neg:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: mov w8, #33008 // =0x80f0
+; CHECK-NOFP16-SD-NEXT: dup v0.4h, w8
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-LABEL: mvni4h_neg:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: movi v0.4h, #240
+; CHECK-FP16-NEXT: fneg v0.4h, v0.4h
+; CHECK-FP16-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: mvni4h_neg:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: ret
+ ret <4 x i16> <i16 33008, i16 33008, i16 33008, i16 33008>
+}
+
define <8 x i16> @mvni8h_lsl0() {
; CHECK-LABEL: mvni8h_lsl0:
; CHECK: // %bb.0:
@@ -306,8 +363,8 @@ define <8 x i16> @mvni8h_neg() {
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI35_0]
; CHECK-NOFP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -486,6 +543,33 @@ define <2 x double> @fmov2d_neg0() {
ret <2 x double> <double -0.0, double -0.0>
}
+define <1 x double> @fmov1d_neg0() {
+; CHECK-NOFP16-SD-LABEL: fmov1d_neg0:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-NOFP16-SD-NEXT: fneg d0, d0
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: fmov1d_neg0:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-FP16-SD-NEXT: fneg d0, d0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: fmov1d_neg0:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NOFP16-GI-NEXT: fmov d0, x8
+; CHECK-NOFP16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: fmov1d_neg0:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-FP16-GI-NEXT: fmov d0, x8
+; CHECK-FP16-GI-NEXT: ret
+ ret <1 x double> <double -0.0>
+}
+
define <2 x i32> @movi1d_1() {
; CHECK-NOFP16-SD-LABEL: movi1d_1:
; CHECK-NOFP16-SD: // %bb.0:
@@ -499,14 +583,14 @@ define <2 x i32> @movi1d_1() {
;
; CHECK-NOFP16-GI-LABEL: movi1d_1:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI52_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi1d_1:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI52_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
; CHECK-FP16-GI-NEXT: ret
ret <2 x i32> <i32 -65536, i32 65535>
}
@@ -517,31 +601,31 @@ define <2 x i32> @movi1d() {
; CHECK-NOFP16-SD-LABEL: movi1d:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI53_0
-; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
+; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0
+; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-NOFP16-SD-NEXT: b test_movi1d
;
; CHECK-FP16-SD-LABEL: movi1d:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-FP16-SD-NEXT: adrp x8, .LCPI53_0
-; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
+; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0
+; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-FP16-SD-NEXT: b test_movi1d
;
; CHECK-NOFP16-GI-LABEL: movi1d:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI53_1
-; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI53_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
-; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1
+; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
+; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
; CHECK-NOFP16-GI-NEXT: b test_movi1d
;
; CHECK-FP16-GI-LABEL: movi1d:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI53_1
-; CHECK-FP16-GI-NEXT: adrp x9, .LCPI53_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
-; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1
+; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
+; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
; CHECK-FP16-GI-NEXT: b test_movi1d
%1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
ret <2 x i32> %1
diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
new file mode 100644
index 0000000..b5c3399
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit
+; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s
+
+@A = global i32 zeroinitializer
+
+define i32 @and_sink1(i32 %a) {
+; CHECK-LABEL: and_sink1:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = !tstbit(r0,#11)
+; CHECK-NEXT: r0 = ##A
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: // %bb0
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) jump:nt .LBB0_1
+; CHECK-NEXT: memw(r0+#0) = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %bb2
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %and = and i32 %a, 2048
+ br label %bb0
+bb0:
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb0, label %bb2
+bb2:
+ ret i32 0
+}
+
+define i32 @and_sink2(i32 %a) {
+; CHECK-LABEL: and_sink2:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = and(r0,##2049)
+; CHECK-NEXT: r0 = ##A
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r1,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB1_1: // %bb0
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) jump:nt .LBB1_1
+; CHECK-NEXT: memw(r0+#0) = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %bb2
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %and = and i32 %a, 2049
+ br label %bb0
+bb0:
+ %cmp = icmp eq i32 %and, 0
+ store i32 0, i32* @A
+ br i1 %cmp, label %bb0, label %bb2
+bb2:
+ ret i32 0
+}
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
new file mode 100644
index 0000000..36670fa
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
@@ -0,0 +1,2239 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL
+
+declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly
+declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly
+
+define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_0:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $a2, $zero
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_0:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $a2, $zero
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_31:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 31
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 32
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_63:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 63
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 64
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize {
+; LA32-LABEL: bcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %bcmp
+}
+
+define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp eq i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-LABEL: memcmp_size_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: and $a1, $a1, $a2
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.h $a0, $a0, 0
+; LA64-UAL-NEXT: ld.h $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2h $a0, $a0
+; LA64-UAL-NEXT: revb.2h $a1, $a1
+; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: lu12i.w $a4, 15
+; LA32-UAL-NEXT: ori $a4, $a4, 3840
+; LA32-UAL-NEXT: and $a5, $a0, $a4
+; LA32-UAL-NEXT: or $a2, $a5, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: and $a2, $a1, $a4
+; LA32-UAL-NEXT: or $a2, $a2, $a3
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a2, $a1
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 16
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 16
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: srli.w $a4, $a2, 8
+; LA32-UAL-NEXT: lu12i.w $a5, 15
+; LA32-UAL-NEXT: ori $a5, $a5, 3840
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a2, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a6, $a2, $a5
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a2, $a2, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a6
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: srli.w $a4, $a3, 8
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a5, $a3, $a5
+; LA32-UAL-NEXT: slli.w $a5, $a5, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB26_2: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a2, $a3
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a6, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a7, $a3, $a6
+; LA32-UAL-NEXT: slli.w $a7, $a7, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a7
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a6, $a4, $a6
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a3, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a3, $a0, $a2
+; LA32-UAL-NEXT: and $a4, $a1, $a2
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB27_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB28_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a2, $a2
+; LA64-UAL-NEXT: addi.w $a4, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a3
+; LA64-UAL-NEXT: addi.w $a5, $a3, 0
+; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: revb.2w $a2, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a3, 0
+; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB28_3: # %res_block
+; LA64-UAL-NEXT: addi.w $a0, $a3, 0
+; LA64-UAL-NEXT: addi.w $a1, $a2, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB29_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB30_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB30_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB31_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB31_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_31:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 31
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB32_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 32
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB33_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_63:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 63
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 64
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize {
+; LA32-LABEL: memcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %memcmp
+}
+
+define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1
+; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0
+; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2
+; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3
+; LA32-NUAL-NEXT: slli.w $a2, $a2, 8
+; LA32-NUAL-NEXT: or $a2, $a2, $a3
+; LA32-NUAL-NEXT: slli.w $a3, $a4, 16
+; LA32-NUAL-NEXT: slli.w $a1, $a1, 24
+; LA32-NUAL-NEXT: or $a1, $a1, $a3
+; LA32-NUAL-NEXT: or $a1, $a1, $a2
+; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1
+; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0
+; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2
+; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3
+; LA32-NUAL-NEXT: slli.w $a2, $a2, 8
+; LA32-NUAL-NEXT: or $a2, $a2, $a3
+; LA32-NUAL-NEXT: slli.w $a3, $a4, 16
+; LA32-NUAL-NEXT: slli.w $a0, $a0, 24
+; LA32-NUAL-NEXT: or $a0, $a0, $a3
+; LA32-NUAL-NEXT: or $a0, $a0, $a2
+; LA32-NUAL-NEXT: xor $a0, $a0, $a1
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1
+; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0
+; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2
+; LA64-NUAL-NEXT: ld.b $a1, $a1, 3
+; LA64-NUAL-NEXT: slli.d $a2, $a2, 8
+; LA64-NUAL-NEXT: or $a2, $a2, $a3
+; LA64-NUAL-NEXT: slli.d $a3, $a4, 16
+; LA64-NUAL-NEXT: slli.d $a1, $a1, 24
+; LA64-NUAL-NEXT: or $a1, $a1, $a3
+; LA64-NUAL-NEXT: or $a1, $a1, $a2
+; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1
+; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0
+; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2
+; LA64-NUAL-NEXT: ld.b $a0, $a0, 3
+; LA64-NUAL-NEXT: slli.d $a2, $a2, 8
+; LA64-NUAL-NEXT: or $a2, $a2, $a3
+; LA64-NUAL-NEXT: slli.d $a3, $a4, 16
+; LA64-NUAL-NEXT: slli.d $a0, $a0, 24
+; LA64-NUAL-NEXT: or $a0, $a0, $a3
+; LA64-NUAL-NEXT: or $a0, $a0, $a2
+; LA64-NUAL-NEXT: xor $a0, $a0, $a1
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp eq i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, 0
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll
new file mode 100644
index 0000000..c1bf850
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll
@@ -0,0 +1,3106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL
+
+declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly
+declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly
+
+define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_0:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $a2, $zero
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_0:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $a2, $zero
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_31:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $t0, $a0, 12
+; LA32-UAL-NEXT: ld.w $t1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a5, $t0, $t1
+; LA32-UAL-NEXT: ld.w $a6, $a0, 16
+; LA32-UAL-NEXT: ld.w $a7, $a1, 16
+; LA32-UAL-NEXT: ld.w $t0, $a0, 20
+; LA32-UAL-NEXT: ld.w $t1, $a1, 20
+; LA32-UAL-NEXT: ld.w $t2, $a0, 24
+; LA32-UAL-NEXT: ld.w $t3, $a1, 24
+; LA32-UAL-NEXT: ld.w $a0, $a0, 27
+; LA32-UAL-NEXT: ld.w $a1, $a1, 27
+; LA32-UAL-NEXT: xor $a6, $a6, $a7
+; LA32-UAL-NEXT: xor $a7, $t0, $t1
+; LA32-UAL-NEXT: xor $t0, $t2, $t3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a2, $a4, $a5
+; LA32-UAL-NEXT: or $a3, $a6, $a7
+; LA32-UAL-NEXT: or $a0, $t0, $a0
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a0, $a3, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_31:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 31
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_32:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $t0, $a0, 12
+; LA32-UAL-NEXT: ld.w $t1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a5, $t0, $t1
+; LA32-UAL-NEXT: ld.w $a6, $a0, 16
+; LA32-UAL-NEXT: ld.w $a7, $a1, 16
+; LA32-UAL-NEXT: ld.w $t0, $a0, 20
+; LA32-UAL-NEXT: ld.w $t1, $a1, 20
+; LA32-UAL-NEXT: ld.w $t2, $a0, 24
+; LA32-UAL-NEXT: ld.w $t3, $a1, 24
+; LA32-UAL-NEXT: ld.w $a0, $a0, 28
+; LA32-UAL-NEXT: ld.w $a1, $a1, 28
+; LA32-UAL-NEXT: xor $a6, $a6, $a7
+; LA32-UAL-NEXT: xor $a7, $t0, $t1
+; LA32-UAL-NEXT: xor $t0, $t2, $t3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a2, $a4, $a5
+; LA32-UAL-NEXT: or $a3, $a6, $a7
+; LA32-UAL-NEXT: or $a0, $t0, $a0
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a0, $a3, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_32:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 32
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_63:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $t0, $a0, 24
+; LA64-UAL-NEXT: ld.d $t1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a5, $t0, $t1
+; LA64-UAL-NEXT: ld.d $a6, $a0, 32
+; LA64-UAL-NEXT: ld.d $a7, $a1, 32
+; LA64-UAL-NEXT: ld.d $t0, $a0, 40
+; LA64-UAL-NEXT: ld.d $t1, $a1, 40
+; LA64-UAL-NEXT: ld.d $t2, $a0, 48
+; LA64-UAL-NEXT: ld.d $t3, $a1, 48
+; LA64-UAL-NEXT: ld.d $a0, $a0, 55
+; LA64-UAL-NEXT: ld.d $a1, $a1, 55
+; LA64-UAL-NEXT: xor $a6, $a6, $a7
+; LA64-UAL-NEXT: xor $a7, $t0, $t1
+; LA64-UAL-NEXT: xor $t0, $t2, $t3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a2, $a4, $a5
+; LA64-UAL-NEXT: or $a3, $a6, $a7
+; LA64-UAL-NEXT: or $a0, $t0, $a0
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: or $a0, $a3, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_63:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 63
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_64:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $t0, $a0, 24
+; LA64-UAL-NEXT: ld.d $t1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a5, $t0, $t1
+; LA64-UAL-NEXT: ld.d $a6, $a0, 32
+; LA64-UAL-NEXT: ld.d $a7, $a1, 32
+; LA64-UAL-NEXT: ld.d $t0, $a0, 40
+; LA64-UAL-NEXT: ld.d $t1, $a1, 40
+; LA64-UAL-NEXT: ld.d $t2, $a0, 48
+; LA64-UAL-NEXT: ld.d $t3, $a1, 48
+; LA64-UAL-NEXT: ld.d $a0, $a0, 56
+; LA64-UAL-NEXT: ld.d $a1, $a1, 56
+; LA64-UAL-NEXT: xor $a6, $a6, $a7
+; LA64-UAL-NEXT: xor $a7, $t0, $t1
+; LA64-UAL-NEXT: xor $t0, $t2, $t3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a2, $a4, $a5
+; LA64-UAL-NEXT: or $a3, $a6, $a7
+; LA64-UAL-NEXT: or $a0, $t0, $a0
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: or $a0, $a3, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_64:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 64
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind {
+; LA32-LABEL: bcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %bcmp
+}
+
+define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ %ret = icmp eq i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_le_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: slti $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_le_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: slti $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_le_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slti $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_le_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 1
+ ret i1 %ret
+}
+
+define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_ge_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ori $a0, $zero, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_ge_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ori $a0, $zero, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_ge_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA32-NUAL-NEXT: slt $a0, $a1, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_ge_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA64-NUAL-NEXT: slt $a0, $a1, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, -1
+ ret i1 %ret
+}
+
+define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind {
+; CHECK-LABEL: memcmp_size_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: and $a1, $a1, $a2
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.h $a0, $a0, 0
+; LA64-UAL-NEXT: ld.h $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2h $a0, $a0
+; LA64-UAL-NEXT: revb.2h $a1, $a1
+; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: lu12i.w $a4, 15
+; LA32-UAL-NEXT: ori $a4, $a4, 3840
+; LA32-UAL-NEXT: and $a5, $a0, $a4
+; LA32-UAL-NEXT: or $a2, $a5, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: and $a2, $a1, $a4
+; LA32-UAL-NEXT: or $a2, $a2, $a3
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a2, $a1
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 16
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 16
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: srli.w $a4, $a2, 8
+; LA32-UAL-NEXT: lu12i.w $a5, 15
+; LA32-UAL-NEXT: ori $a5, $a5, 3840
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a2, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a6, $a2, $a5
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a2, $a2, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a6
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: srli.w $a4, $a3, 8
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a5, $a3, $a5
+; LA32-UAL-NEXT: slli.w $a5, $a5, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB28_2: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a2, $a3
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a6, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a7, $a3, $a6
+; LA32-UAL-NEXT: slli.w $a7, $a7, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a7
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a6, $a4, $a6
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a3, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a3, $a0, $a2
+; LA32-UAL-NEXT: and $a4, $a1, $a2
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB29_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB30_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a2, $a2
+; LA64-UAL-NEXT: addi.w $a4, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a3
+; LA64-UAL-NEXT: addi.w $a5, $a3, 0
+; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: revb.2w $a2, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a3, 0
+; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB30_3: # %res_block
+; LA64-UAL-NEXT: addi.w $a0, $a3, 0
+; LA64-UAL-NEXT: addi.w $a1, $a2, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB31_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB32_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB32_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB33_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB33_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_31:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a3, $a0, 12
+; LA32-UAL-NEXT: ld.w $a4, $a1, 12
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.4: # %loadbb4
+; LA32-UAL-NEXT: ld.w $a3, $a0, 16
+; LA32-UAL-NEXT: ld.w $a4, $a1, 16
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.5: # %loadbb5
+; LA32-UAL-NEXT: ld.w $a3, $a0, 20
+; LA32-UAL-NEXT: ld.w $a4, $a1, 20
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.6: # %loadbb6
+; LA32-UAL-NEXT: ld.w $a3, $a0, 24
+; LA32-UAL-NEXT: ld.w $a4, $a1, 24
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.7: # %loadbb7
+; LA32-UAL-NEXT: ld.w $a0, $a0, 27
+; LA32-UAL-NEXT: ld.w $a1, $a1, 27
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.8:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB34_9: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB34_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_31:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 31
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_32:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a3, $a0, 12
+; LA32-UAL-NEXT: ld.w $a4, $a1, 12
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.4: # %loadbb4
+; LA32-UAL-NEXT: ld.w $a3, $a0, 16
+; LA32-UAL-NEXT: ld.w $a4, $a1, 16
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.5: # %loadbb5
+; LA32-UAL-NEXT: ld.w $a3, $a0, 20
+; LA32-UAL-NEXT: ld.w $a4, $a1, 20
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.6: # %loadbb6
+; LA32-UAL-NEXT: ld.w $a3, $a0, 24
+; LA32-UAL-NEXT: ld.w $a4, $a1, 24
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.7: # %loadbb7
+; LA32-UAL-NEXT: ld.w $a0, $a0, 28
+; LA32-UAL-NEXT: ld.w $a1, $a1, 28
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.8:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB35_9: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB35_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_32:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 32
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_63:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a2, $a0, 24
+; LA64-UAL-NEXT: ld.d $a3, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.4: # %loadbb4
+; LA64-UAL-NEXT: ld.d $a2, $a0, 32
+; LA64-UAL-NEXT: ld.d $a3, $a1, 32
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.5: # %loadbb5
+; LA64-UAL-NEXT: ld.d $a2, $a0, 40
+; LA64-UAL-NEXT: ld.d $a3, $a1, 40
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.6: # %loadbb6
+; LA64-UAL-NEXT: ld.d $a2, $a0, 48
+; LA64-UAL-NEXT: ld.d $a3, $a1, 48
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.7: # %loadbb7
+; LA64-UAL-NEXT: ld.d $a0, $a0, 55
+; LA64-UAL-NEXT: ld.d $a1, $a1, 55
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.8:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB36_9: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_63:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 63
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_64:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a2, $a0, 24
+; LA64-UAL-NEXT: ld.d $a3, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.4: # %loadbb4
+; LA64-UAL-NEXT: ld.d $a2, $a0, 32
+; LA64-UAL-NEXT: ld.d $a3, $a1, 32
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.5: # %loadbb5
+; LA64-UAL-NEXT: ld.d $a2, $a0, 40
+; LA64-UAL-NEXT: ld.d $a3, $a1, 40
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.6: # %loadbb6
+; LA64-UAL-NEXT: ld.d $a2, $a0, 48
+; LA64-UAL-NEXT: ld.d $a3, $a1, 48
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.7: # %loadbb7
+; LA64-UAL-NEXT: ld.d $a0, $a0, 56
+; LA64-UAL-NEXT: ld.d $a1, $a1, 56
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.8:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB37_9: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_64:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 64
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind {
+; LA32-LABEL: memcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %memcmp
+}
+
+define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ %ret = icmp eq i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_le_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: xori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_le_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: xori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_le_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slti $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_le_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 1
+ ret i1 %ret
+}
+
+define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_ge_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: xori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_ge_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: xori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_ge_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA32-NUAL-NEXT: slt $a0, $a1, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_ge_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA64-NUAL-NEXT: slt $a0, $a1, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, -1
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll
index c4aaf9a..c3811c0 100644
--- a/llvm/test/CodeGen/LoongArch/memcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/memcmp.ll
@@ -7,15 +7,24 @@
define signext i32 @test1(ptr %buffer1, ptr %buffer2) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: ori $a2, $zero, 16
-; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ld.d $a2, $a0, 0
+; CHECK-NEXT: ld.d $a3, $a1, 0
+; CHECK-NEXT: revb.d $a2, $a2
+; CHECK-NEXT: revb.d $a3, $a3
+; CHECK-NEXT: bne $a2, $a3, .LBB0_3
+; CHECK-NEXT: # %bb.1: # %loadbb1
+; CHECK-NEXT: ld.d $a0, $a0, 8
+; CHECK-NEXT: ld.d $a1, $a1, 8
+; CHECK-NEXT: revb.d $a2, $a0
+; CHECK-NEXT: revb.d $a3, $a1
+; CHECK-NEXT: bne $a2, $a3, .LBB0_3
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_3: # %res_block
+; CHECK-NEXT: sltu $a0, $a2, $a3
+; CHECK-NEXT: sub.d $a0, $zero, $a0
+; CHECK-NEXT: ori $a0, $a0, 1
; CHECK-NEXT: ret
entry:
%call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
new file mode 100644
index 0000000..9cfed6a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs)
+ ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs)
+ ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs)
+ ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs)
+ ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v32bf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs)
+ ret <32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index a2178e1..2455d87 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -1,8 +1,172 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.copysign.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
@@ -311,10 +475,10 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
-; CHECK-NEXT: bltu a2, a1, .LBB26_2
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t
; CHECK-NEXT: addi a0, a2, -16
@@ -346,10 +510,10 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: bltu a2, a1, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v0
; CHECK-NEXT: addi a0, a2, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
new file mode 100644
index 0000000..27c00de
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x bfloat> @v1bf16(<1 x bfloat> %v) {
+; CHECK-LABEL: v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v)
+ ret <1 x bfloat> %r
+}
+
+define <2 x bfloat> @v2bf16(<2 x bfloat> %v) {
+; CHECK-LABEL: v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v)
+ ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @v4bf16(<4 x bfloat> %v) {
+; CHECK-LABEL: v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v)
+ ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @v8bf16(<8 x bfloat> %v) {
+; CHECK-LABEL: v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v)
+ ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @v16bf16(<16 x bfloat> %v) {
+; CHECK-LABEL: v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v)
+ ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @v32bf16(<32 x bfloat> %v) {
+; CHECK-LABEL: v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v)
+ ret <32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
index 08f486b..01bd706 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
@@ -1,12 +1,224 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfabs_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32)
@@ -24,6 +236,14 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
@@ -42,6 +262,14 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
}
@@ -62,6 +290,14 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
@@ -80,6 +316,14 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
}
@@ -100,6 +344,14 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
@@ -118,6 +370,14 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
}
@@ -138,6 +398,14 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
@@ -156,6 +424,14 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
}
@@ -367,10 +643,10 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
+; CHECK-NEXT: bltu a0, a2, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
@@ -390,10 +666,10 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB27_2
+; CHECK-NEXT: bltu a0, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
new file mode 100644
index 0000000..b3b9a62
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x bfloat> @v1bf16(<1 x bfloat> %va) {
+; CHECK-LABEL: v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <1 x bfloat> %va
+ ret <1 x bfloat> %vb
+}
+
+define <2 x bfloat> @v2bf16(<2 x bfloat> %va) {
+; CHECK-LABEL: v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <2 x bfloat> %va
+ ret <2 x bfloat> %vb
+}
+
+define <4 x bfloat> @v4bf16(<4 x bfloat> %va) {
+; CHECK-LABEL: v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <4 x bfloat> %va
+ ret <4 x bfloat> %vb
+}
+
+define <8 x bfloat> @v8bf16(<8 x bfloat> %va) {
+; CHECK-LABEL: v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <8 x bfloat> %va
+ ret <8 x bfloat> %vb
+}
+
+define <16 x bfloat> @v16bf16(<16 x bfloat> %va) {
+; CHECK-LABEL: v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <16 x bfloat> %va
+ ret <16 x bfloat> %vb
+}
+
+define <32 x bfloat> @v32bf16(<32 x bfloat> %va) {
+; CHECK-LABEL: v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <32 x bfloat> %va
+ ret <32 x bfloat> %vb
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
index 968fd9f9..dede0e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
@@ -1,12 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfneg_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32)
@@ -23,6 +219,13 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
@@ -40,6 +243,13 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
}
@@ -59,6 +269,13 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
@@ -76,6 +293,13 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
}
@@ -95,6 +319,13 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
@@ -112,6 +343,13 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
}
@@ -131,6 +369,13 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
@@ -148,6 +393,13 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
}
@@ -359,10 +611,10 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
+; CHECK-NEXT: bltu a0, a2, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
@@ -382,10 +634,10 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB27_2
+; CHECK-NEXT: bltu a0, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
index ccf82b9..2f5fde3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
@@ -1,12 +1,376 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -26,6 +390,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -46,6 +420,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -68,6 +452,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -88,6 +482,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -110,6 +514,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -130,6 +544,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -152,6 +576,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -172,6 +606,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -194,6 +638,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -214,6 +668,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16_unmasked(<vscale x 16 x half> %v
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -236,6 +700,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -256,6 +730,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 1d86388..28426ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -11,75 +11,165 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %v) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x bfloat> @llvm.fabs.nxv1bf16(<vscale x 1 x bfloat> %v)
ret <vscale x 1 x bfloat> %r
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %v) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %v)
ret <vscale x 2 x bfloat> %r
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %v) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %v)
ret <vscale x 4 x bfloat> %r
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %v) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %v)
ret <vscale x 8 x bfloat> %r
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %v) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> %v)
ret <vscale x 16 x bfloat> %r
}
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %v) {
-; CHECK-LABEL: nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x bfloat> @llvm.fabs.nxv32bf16(<vscale x 32 x bfloat> %v)
ret <vscale x 32 x bfloat> %r
}
@@ -100,6 +190,14 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
ret <vscale x 1 x half> %r
}
@@ -120,6 +218,14 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
ret <vscale x 2 x half> %r
}
@@ -140,6 +246,14 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
ret <vscale x 4 x half> %r
}
@@ -160,6 +274,14 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
ret <vscale x 8 x half> %r
}
@@ -180,6 +302,14 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
ret <vscale x 16 x half> %r
}
@@ -200,6 +330,14 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
ret <vscale x 32 x half> %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
index 8f9f9c4..c6888c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
@@ -1,12 +1,328 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -24,6 +340,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -42,6 +366,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -62,6 +394,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -80,6 +420,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -100,6 +448,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -118,6 +474,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -138,6 +502,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -156,6 +528,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -176,6 +556,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -194,6 +582,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -214,6 +610,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -232,6 +636,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -473,10 +885,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8, v0.t
@@ -495,10 +907,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 83f588c..bef2e8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -11,87 +11,189 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs)
ret <vscale x 1 x bfloat> %r
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs)
ret <vscale x 2 x bfloat> %r
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs)
ret <vscale x 4 x bfloat> %r
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vand.vx v10, v10, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs)
ret <vscale x 8 x bfloat> %r
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vand.vx v12, v12, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs)
ret <vscale x 16 x bfloat> %r
}
define <vscale x 32 x bfloat> @nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) {
-; CHECK-LABEL: nxv32bf32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vand.vx v16, v16, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf32:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf32:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs)
ret <vscale x 32 x bfloat> %r
}
@@ -114,6 +216,16 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs)
ret <vscale x 1 x half> %r
}
@@ -136,6 +248,18 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %splat)
@@ -159,6 +283,17 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x half> %vs
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n)
ret <vscale x 1 x half> %r
@@ -183,6 +318,19 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x half> %splat
@@ -208,6 +356,17 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vand.vx v9, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
ret <vscale x 1 x half> %r
@@ -235,6 +394,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
%esplat = fptrunc <vscale x 1 x float> %splat to <vscale x 1 x half>
@@ -261,6 +433,18 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vxor.vx v9, v10, a0
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x float> %vs
%eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg)
@@ -290,6 +474,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v9, v10, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x float> %splat
@@ -320,6 +518,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
ret <vscale x 1 x half> %r
@@ -351,6 +562,21 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
%esplat = fptrunc <vscale x 1 x double> %splat to <vscale x 1 x half>
@@ -381,6 +607,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x double> %vs
%eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg)
@@ -414,6 +654,22 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x double> %splat
@@ -440,6 +696,16 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs)
ret <vscale x 2 x half> %r
}
@@ -462,6 +728,18 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %splat)
@@ -485,6 +763,17 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 2 x half> %vs
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n)
ret <vscale x 2 x half> %r
@@ -509,6 +798,19 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%n = fneg <vscale x 2 x half> %splat
@@ -534,6 +836,16 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs)
ret <vscale x 4 x half> %r
}
@@ -556,6 +868,18 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %splat)
@@ -579,6 +903,17 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 4 x half> %vs
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n)
ret <vscale x 4 x half> %r
@@ -603,6 +938,19 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%n = fneg <vscale x 4 x half> %splat
@@ -628,6 +976,16 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs)
ret <vscale x 8 x half> %r
}
@@ -650,6 +1008,18 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v10, v10, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v10, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %splat)
@@ -673,6 +1043,17 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x half> %vs
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n)
ret <vscale x 8 x half> %r
@@ -697,6 +1078,19 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v10, v10, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v10, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x half> %splat
@@ -722,6 +1116,17 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
ret <vscale x 8 x half> %r
@@ -749,6 +1154,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
%esplat = fptrunc <vscale x 8 x float> %splat to <vscale x 8 x half>
@@ -775,6 +1193,18 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x float> %vs
%eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg)
@@ -804,6 +1234,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x float> %splat
@@ -834,6 +1278,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
ret <vscale x 8 x half> %r
@@ -865,6 +1322,21 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
%esplat = fptrunc <vscale x 8 x double> %splat to <vscale x 8 x half>
@@ -895,6 +1367,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x double> %vs
%eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg)
@@ -928,6 +1414,22 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x double> %splat
@@ -954,6 +1456,16 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs)
ret <vscale x 16 x half> %r
}
@@ -976,6 +1488,18 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha
; ZVFHMIN-NEXT: vand.vx v12, v12, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %splat)
@@ -999,6 +1523,17 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, <
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v12, v12, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 16 x half> %vs
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n)
ret <vscale x 16 x half> %r
@@ -1023,6 +1558,19 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h
; ZVFHMIN-NEXT: vand.vx v12, v12, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v12, v12, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%n = fneg <vscale x 16 x half> %splat
@@ -1048,6 +1596,16 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs)
ret <vscale x 32 x half> %r
}
@@ -1070,6 +1628,18 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha
; ZVFHMIN-NEXT: vand.vx v16, v16, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %splat)
@@ -1093,6 +1663,17 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, <
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v16, v16, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 32 x half> %vs
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n)
ret <vscale x 32 x half> %r
@@ -1117,6 +1698,19 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h
; ZVFHMIN-NEXT: vand.vx v16, v16, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v16, v16, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%n = fneg <vscale x 32 x half> %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 9f456e9..c0b4916 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -11,69 +11,153 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %va) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 1 x bfloat> %va
ret <vscale x 1 x bfloat> %vb
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %va) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 2 x bfloat> %va
ret <vscale x 2 x bfloat> %vb
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %va) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 4 x bfloat> %va
ret <vscale x 4 x bfloat> %vb
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %va) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 8 x bfloat> %va
ret <vscale x 8 x bfloat> %vb
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %va) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 16 x bfloat> %va
ret <vscale x 16 x bfloat> %vb
}
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %va) {
-; CHECK-LABEL: nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 32 x bfloat> %va
ret <vscale x 32 x bfloat> %vb
}
@@ -91,6 +175,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 1 x half> %va
ret <vscale x 1 x half> %vb
}
@@ -108,6 +199,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 2 x half> %va
ret <vscale x 2 x half> %vb
}
@@ -125,6 +223,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 4 x half> %va
ret <vscale x 4 x half> %vb
}
@@ -142,6 +247,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 8 x half> %va
ret <vscale x 8 x half> %vb
}
@@ -159,6 +271,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 16 x half> %va
ret <vscale x 16 x half> %vb
}
@@ -176,6 +295,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 32 x half> %va
ret <vscale x 32 x half> %vb
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
index bbab056..9bd24c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
@@ -1,12 +1,304 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -23,6 +315,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -40,6 +339,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -59,6 +365,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -76,6 +389,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -95,6 +415,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -112,6 +439,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -131,6 +465,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -148,6 +489,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -167,6 +515,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -184,6 +539,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -203,6 +565,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -220,6 +589,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -461,10 +837,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8, v0.t
@@ -483,10 +859,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll
new file mode 100644
index 0000000..a189b2a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll
@@ -0,0 +1,34 @@
+; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1
+; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_16bit_atomics,+SPV_KHR_bfloat16,+SPV_INTEL_bfloat16_arithmetic %s -o - | FileCheck %s
+
+; CHECK-ERROR1: LLVM ERROR: The atomic float instruction requires the following SPIR-V extension: SPV_EXT_shader_atomic_float_add
+; CHECK-ERROR2: LLVM ERROR: The atomic bfloat16 instruction requires the following SPIR-V extension: SPV_INTEL_16bit_atomics
+
+; CHECK: Capability BFloat16TypeKHR
+; CHECK: Capability AtomicBFloat16AddINTEL
+; CHECK: Extension "SPV_KHR_bfloat16"
+; CHECK: Extension "SPV_EXT_shader_atomic_float_add"
+; CHECK: Extension "SPV_INTEL_16bit_atomics"
+; CHECK-DAG: %[[TyBF16:[0-9]+]] = OpTypeFloat 16 0
+; CHECK-DAG: %[[TyBF16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyBF16]]
+; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: %[[ConstBF16:[0-9]+]] = OpConstant %[[TyBF16]] 16936{{$}}
+; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstantNull %[[TyBF16]]
+; CHECK-DAG: %[[BF16Ptr:[0-9]+]] = OpVariable %[[TyBF16Ptr]] CrossWorkgroup %[[Const0]]
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
+; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16{{$}}
+; CHECK: OpAtomicFAddEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]]
+; CHECK: %[[NegatedConstBF16:[0-9]+]] = OpFNegate %[[TyBF16]] %[[ConstBF16]]
+; CHECK: OpAtomicFAddEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[NegatedConstBF16]]
+
+
+@f = common dso_local local_unnamed_addr addrspace(1) global bfloat 0.000000e+00, align 8
+
+define dso_local spir_func void @test1() local_unnamed_addr {
+entry:
+ %addval = atomicrmw fadd ptr addrspace(1) @f, bfloat 42.000000e+00 seq_cst
+ %subval = atomicrmw fsub ptr addrspace(1) @f, bfloat 42.000000e+00 seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll
new file mode 100644
index 0000000..dd84480
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll
@@ -0,0 +1,28 @@
+; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_min_max,+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_min_max,+SPV_INTEL_16bit_atomics,+SPV_KHR_bfloat16 %s -o - | FileCheck %s
+
+; CHECK-ERROR: LLVM ERROR: The atomic bfloat16 instruction requires the following SPIR-V extension: SPV_INTEL_16bit_atomics
+
+; CHECK: Capability AtomicBFloat16MinMaxINTEL
+; CHECK: Extension "SPV_KHR_bfloat16"
+; CHECK: Extension "SPV_EXT_shader_atomic_float_min_max"
+; CHECK: Extension "SPV_INTEL_16bit_atomics"
+; CHECK-DAG: %[[TyBF16:[0-9]+]] = OpTypeFloat 16 0
+; CHECK-DAG: %[[TyBF16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyBF16]]
+; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: %[[ConstBF16:[0-9]+]] = OpConstant %[[TyBF16]] 16936{{$}}
+; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstantNull %[[TyBF16]]
+; CHECK-DAG: %[[BF16Ptr:[0-9]+]] = OpVariable %[[TyBF16Ptr]] CrossWorkgroup %[[Const0]]
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
+; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16{{$}}
+; CHECK: OpAtomicFMinEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]]
+; CHECK: OpAtomicFMaxEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]]
+
+@f = common dso_local local_unnamed_addr addrspace(1) global bfloat 0.000000e+00, align 8
+
+define spir_func void @test1() {
+entry:
+ %minval = atomicrmw fmin ptr addrspace(1) @f, bfloat 42.0e+00 seq_cst
+ %maxval = atomicrmw fmax ptr addrspace(1) @f, bfloat 42.0e+00 seq_cst
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
index 9531323..8ceb310 100644
--- a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
+++ b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
@@ -1,15 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -p instcombine -S %s | FileCheck %s
-define i64 @test_sink_with_dereferenceable_assume(ptr %p, ptr %q, i1 %cond) {
-; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume(
-; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[COND:%.*]]) {
+define i64 @test_dereferenceable_assume(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_dereferenceable_assume(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: ret i64 [[DIFF]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %p_int = ptrtoint ptr %p to i64
+ %q_int = ptrtoint ptr %q to i64
+ %diff = sub i64 %q_int, %p_int
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+ br i1 %c.0, label %then, label %else
+
+then:
+ ret i64 %diff
+
+else:
+ ret i64 0
+}
+
+define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
; CHECK-NEXT: ret i64 [[DIFF]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: ret i64 0
@@ -18,14 +47,77 @@ entry:
%p_int = ptrtoint ptr %p to i64
%q_int = ptrtoint ptr %q to i64
%diff = sub i64 %q_int, %p_int
- call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
- br i1 %cond, label %then, label %else
+ br i1 %c.0, label %then, label %else
then:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
ret i64 %diff
else:
ret i64 0
}
+define i64 @test_sink_with_multiple_users_dominated_by_deref(ptr %p, ptr %q, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: define i64 @test_sink_with_multiple_users_dominated_by_deref(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]], i1 [[C_1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT: br i1 [[C_1]], label %[[THEN_2:.*]], label %[[ELSE]]
+; CHECK: [[THEN_2]]:
+; CHECK-NEXT: [[DOUBLED:%.*]] = shl i64 [[DIFF]], 1
+; CHECK-NEXT: ret i64 [[DOUBLED]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %p_int = ptrtoint ptr %p to i64
+ %q_int = ptrtoint ptr %q to i64
+ %diff = sub i64 %q_int, %p_int
+ br i1 %c.0, label %then, label %else
+
+then:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+ br i1 %c.1, label %then.2, label %else
+
+then.2:
+ %doubled = mul i64 %diff, 2
+ ret i64 %doubled
+
+else:
+ ret i64 0
+}
+
+define i64 @test_deref_user_does_not_dominate_other_user(ptr %p, ptr %q, i1 %c.0) {
+; CHECK-LABEL: define i64 @test_deref_user_does_not_dominate_other_user(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT: br i1 [[C_0]], label %[[MIDDLE:.*]], label %[[EXIT:.*]]
+; CHECK: [[MIDDLE]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i64 [[DIFF]]
+;
+entry:
+ %p_int = ptrtoint ptr %p to i64
+ %q_int = ptrtoint ptr %q to i64
+ %diff = sub i64 %q_int, %p_int
+ br i1 %c.0, label %middle, label %exit
+
+middle:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+ br label %exit
+
+exit:
+ ret i64 %diff
+}
+
declare void @llvm.assume(i1 noundef)
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
index 33e3e83..e914979 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
@@ -133,75 +133,65 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) {
; CHECK-LABEL: define noundef ptr @std_find_caller(
; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[PTR_SUB]]) ]
; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]]
; CHECK: [[LOOP_HEADER_I_PREHEADER]]:
-; CHECK-NEXT: [[LAST2:%.*]] = ptrtoint ptr [[LAST]] to i64
-; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64
-; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64
-; CHECK-NEXT: [[FIRST1:%.*]] = ptrtoint ptr [[FIRST]] to i64
-; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST1]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST2]], -2
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP1]], 6
-; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP4]], 6
-; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT:.*]], label %[[LOOP_HEADER_I_PROL:.*]]
-; CHECK: [[LOOP_HEADER_I_PROL]]:
-; CHECK-NEXT: [[PTR_IV_I_PROL:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_PROL:%.*]], %[[LOOP_LATCH_I_PROL:.*]] ], [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ]
-; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_I_PROL]] ], [ 0, %[[LOOP_HEADER_I_PREHEADER]] ]
-; CHECK-NEXT: [[L_I_PROL:%.*]] = load i16, ptr [[PTR_IV_I_PROL]], align 2
-; CHECK-NEXT: [[C_1_I_PROL:%.*]] = icmp eq i16 [[L_I_PROL]], 1
-; CHECK-NEXT: br i1 [[C_1_I_PROL]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I_PROL]]
-; CHECK: [[LOOP_LATCH_I_PROL]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_PROL]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I_PROL]], i64 2
-; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 158
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1)
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP5]], 0
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT]], label %[[LOOP_HEADER_I_PROL]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[LOOP_HEADER_I_PROL_LOOPEXIT]]:
-; CHECK-NEXT: [[PTR_IV_I_UNR:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[PTR_IV_NEXT_I_PROL]], %[[LOOP_LATCH_I_PROL]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP1]], 6
-; CHECK-NEXT: br i1 [[TMP5]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I:.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[PROL_ITER_CMP_NOT]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[XTRAITER]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP9]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]]
+; CHECK: [[LOOP_HEADER_I_PREHEADER2]]:
+; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
+; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
; CHECK: [[LOOP_HEADER_I]]:
-; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_3:%.*]], %[[LOOP_LATCH_I_3:.*]] ], [ [[PTR_IV_I_UNR]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ]
+; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I:%.*]], %[[LOOP_LATCH_I:.*]] ], [ [[PTR_IV_I_PH]], %[[LOOP_HEADER_I_PREHEADER2]] ]
; CHECK-NEXT: [[L_I:%.*]] = load i16, ptr [[PTR_IV_I]], align 2
; CHECK-NEXT: [[C_1_I:%.*]] = icmp eq i16 [[L_I]], 1
-; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I:.*]]
+; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I]]
; CHECK: [[LOOP_LATCH_I]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
-; CHECK-NEXT: [[L_I_1:%.*]] = load i16, ptr [[PTR_IV_NEXT_I]], align 2
-; CHECK-NEXT: [[C_1_I_1:%.*]] = icmp eq i16 [[L_I_1]], 1
-; CHECK-NEXT: br i1 [[C_1_I_1]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11:.*]], label %[[LOOP_LATCH_I_1:.*]]
-; CHECK: [[LOOP_LATCH_I_1]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4
-; CHECK-NEXT: [[L_I_2:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_1]], align 2
-; CHECK-NEXT: [[C_1_I_2:%.*]] = icmp eq i16 [[L_I_2]], 1
-; CHECK-NEXT: br i1 [[C_1_I_2]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9:.*]], label %[[LOOP_LATCH_I_2:.*]]
-; CHECK: [[LOOP_LATCH_I_2]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6
-; CHECK-NEXT: [[L_I_3:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_2]], align 2
-; CHECK-NEXT: [[C_1_I_3:%.*]] = icmp eq i16 [[L_I_3]], 1
-; CHECK-NEXT: br i1 [[C_1_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7:.*]], label %[[LOOP_LATCH_I_3]]
-; CHECK: [[LOOP_LATCH_I_3]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 8
-; CHECK-NEXT: [[EC_I_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I_3]], [[LAST]]
-; CHECK-NEXT: br i1 [[EC_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]]
-; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_2_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6
-; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
-; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_1_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4
-; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
-; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]]:
-; CHECK-NEXT: [[PTR_IV_NEXT_I_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
-; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
+; CHECK-NEXT: [[PTR_IV_NEXT_I]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
+; CHECK-NEXT: [[EC_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I]], [[LAST]]
+; CHECK-NEXT: br i1 [[EC_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT]]:
-; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ], [ [[PTR_IV_NEXT_I_2_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]] ], [ [[PTR_IV_NEXT_I_1_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]] ], [ [[PTR_IV_NEXT_I_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I_3]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ], [ [[PTR_IV_I_PROL]], %[[LOOP_HEADER_I_PROL]] ]
+; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ]
; CHECK-NEXT: ret ptr [[RES_I]]
;
entry:
@@ -241,6 +231,6 @@ declare void @llvm.assume(i1 noundef)
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/tools/llc/save-stats.ll b/llvm/test/tools/llc/save-stats.ll
new file mode 100644
index 0000000..acb0367
--- /dev/null
+++ b/llvm/test/tools/llc/save-stats.ll
@@ -0,0 +1,16 @@
+; REQUIRES: asserts
+; REQUIRES: aarch64-registered-target
+
+; RUN: llc -mtriple=arm64-apple-macosx --save-stats=obj -o %t.s %s && cat %t.stats | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-macosx --save-stats=cwd -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-macosx --save-stats -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s
+; RUN: not llc -mtriple=arm64-apple-macosx --save-stats=invalid -o %t.s %s 2>&1 | FileCheck %s --check-prefix=INVALID_ARG
+
+; CHECK: {
+; CHECK: "asm-printer.EmittedInsts":
+; CHECK: }
+
+; INVALID_ARG: {{.*}}llc{{.*}}: for the --save-stats option: Cannot find option named 'invalid'!
+define i32 @func() {
+ ret i32 0
+}
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index ee1e906..1fc5bba 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -90,7 +90,6 @@
#include <cstdlib>
#include <cstring>
#include <limits>
-#include <map>
#include <memory>
#include <optional>
#include <string>
diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
index 256933d..06045a6 100644
--- a/llvm/tools/gold/gold-plugin.cpp
+++ b/llvm/tools/gold/gold-plugin.cpp
@@ -36,7 +36,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
#include <list>
-#include <map>
#include <plugin-api.h>
#include <string>
#include <system_error>
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 152f7db..dc2f878 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -15,6 +15,7 @@
#include "NewPMDriver.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
@@ -45,6 +46,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/PGOOptions.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
@@ -57,6 +59,7 @@
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include <cassert>
#include <memory>
#include <optional>
using namespace llvm;
@@ -208,6 +211,20 @@ static cl::opt<std::string> RemarksFormat(
cl::desc("The format used for serializing remarks (default: YAML)"),
cl::value_desc("format"), cl::init("yaml"));
+enum SaveStatsMode { None, Cwd, Obj };
+
+static cl::opt<SaveStatsMode> SaveStats(
+ "save-stats",
+ cl::desc("Save LLVM statistics to a file in the current directory"
+ "(`-save-stats`/`-save-stats=cwd`) or the directory of the output"
+ "file (`-save-stats=obj`). (default: cwd)"),
+ cl::values(clEnumValN(SaveStatsMode::Cwd, "cwd",
+ "Save to the current working directory"),
+ clEnumValN(SaveStatsMode::Cwd, "", ""),
+ clEnumValN(SaveStatsMode::Obj, "obj",
+ "Save to the output file directory")),
+ cl::init(SaveStatsMode::None), cl::ValueOptional);
+
static cl::opt<bool> EnableNewPassManager(
"enable-new-pm", cl::desc("Enable the new pass manager"), cl::init(false));
@@ -281,7 +298,8 @@ static void setPGOOptions(TargetMachine &TM) {
TM.setPGOOption(PGOOpt);
}
-static int compileModule(char **, LLVMContext &);
+static int compileModule(char **argv, LLVMContext &Context,
+ std::string &OutputFilename);
[[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") {
SmallString<256> Prefix;
@@ -360,6 +378,45 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
return FDOut;
}
+static int MaybeEnableStats() {
+ if (SaveStats == SaveStatsMode::None)
+ return 0;
+
+ llvm::EnableStatistics(false);
+ return 0;
+}
+
+static int MaybeSaveStats(std::string &&OutputFilename) {
+ if (SaveStats == SaveStatsMode::None)
+ return 0;
+
+ SmallString<128> StatsFilename;
+ if (SaveStats == SaveStatsMode::Obj) {
+ StatsFilename = OutputFilename;
+ llvm::sys::path::remove_filename(StatsFilename);
+ } else {
+ assert(SaveStats == SaveStatsMode::Cwd &&
+ "Should have been a valid --save-stats value");
+ }
+
+ auto BaseName = llvm::sys::path::filename(OutputFilename);
+ llvm::sys::path::append(StatsFilename, BaseName);
+ llvm::sys::path::replace_extension(StatsFilename, "stats");
+
+ auto FileFlags = llvm::sys::fs::OF_TextWithCRLF;
+ std::error_code EC;
+ auto StatsOS =
+ std::make_unique<llvm::raw_fd_ostream>(StatsFilename, EC, FileFlags);
+ if (EC) {
+ WithColor::error(errs(), "llc")
+ << "Unable to open statistics file: " << EC.message() << "\n";
+ return 1;
+ }
+
+ llvm::PrintStatisticsJSON(*StatsOS);
+ return 0;
+}
+
// main - Entry point for the llc compiler.
//
int main(int argc, char **argv) {
@@ -437,18 +494,23 @@ int main(int argc, char **argv) {
reportError(std::move(E), RemarksFilename);
LLVMRemarkFileHandle RemarksFile = std::move(*RemarksFileOrErr);
+ if (int RetVal = MaybeEnableStats())
+ return RetVal;
+ std::string OutputFilename;
+
if (InputLanguage != "" && InputLanguage != "ir" && InputLanguage != "mir")
reportError("input language must be '', 'IR' or 'MIR'");
// Compile the module TimeCompilations times to give better compile time
// metrics.
for (unsigned I = TimeCompilations; I; --I)
- if (int RetVal = compileModule(argv, Context))
+ if (int RetVal = compileModule(argv, Context, OutputFilename))
return RetVal;
if (RemarksFile)
RemarksFile->keep();
- return 0;
+
+ return MaybeSaveStats(std::move(OutputFilename));
}
static bool addPass(PassManagerBase &PM, const char *argv0, StringRef PassName,
@@ -480,7 +542,8 @@ static bool addPass(PassManagerBase &PM, const char *argv0, StringRef PassName,
return false;
}
-static int compileModule(char **argv, LLVMContext &Context) {
+static int compileModule(char **argv, LLVMContext &Context,
+ std::string &OutputFilename) {
// Load the module to be compiled...
SMDiagnostic Err;
std::unique_ptr<Module> M;
@@ -664,6 +727,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
// Ensure the filename is passed down to CodeViewDebug.
Target->Options.ObjectFilenameForDebug = Out->outputFilename();
+ // Return a copy of the output filename via the output param
+ OutputFilename = Out->outputFilename();
+
// Tell target that this tool is not necessarily used with argument ABI
// compliance (i.e. narrow integer argument extensions).
Target->Options.VerifyArgABICompliance = 0;
diff --git a/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h b/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h
index 55e628a..4ee3e7c 100644
--- a/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h
+++ b/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h
@@ -37,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include <functional>
-#include <set>
using Instr = llvm::cfi_verify::FileAnalysis::Instr;
diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp
index e12016c..1244bfb 100644
--- a/llvm/tools/llvm-ifs/llvm-ifs.cpp
+++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp
@@ -34,7 +34,6 @@
#include "llvm/TextAPI/TextAPIReader.h"
#include "llvm/TextAPI/TextAPIWriter.h"
#include <optional>
-#include <set>
#include <string>
#include <vector>
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 09f7142..30f2e53 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -45,14 +45,13 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
-#include <map>
#include <memory>
#include <string>
#include <system_error>
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.h b/llvm/tools/llvm-rc/ResourceFileWriter.h
index 82d3e3b..a13af45 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.h
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.h
@@ -19,6 +19,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
+#include <map>
+
namespace llvm {
class MemoryBuffer;
diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.h b/llvm/tools/llvm-rc/ResourceScriptToken.h
index 50ef8e4..4c839a0 100644
--- a/llvm/tools/llvm-rc/ResourceScriptToken.h
+++ b/llvm/tools/llvm-rc/ResourceScriptToken.h
@@ -28,7 +28,6 @@
#include "llvm/Support/Error.h"
#include <cstdint>
-#include <map>
#include <vector>
namespace llvm {
diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp
index 8ffc5f6..1aa0e42 100644
--- a/llvm/unittests/Analysis/IR2VecTest.cpp
+++ b/llvm/unittests/Analysis/IR2VecTest.cpp
@@ -18,7 +18,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include <map>
#include <vector>
using namespace llvm;
diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
index ec94083..13070e8 100644
--- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
@@ -15,7 +15,6 @@
#include "llvm/Testing/Support/Error.h"
#include <deque>
-#include <set>
#include <thread>
using namespace llvm;
diff --git a/llvm/unittests/Support/ParallelTest.cpp b/llvm/unittests/Support/ParallelTest.cpp
index 041067d..c7ecc4e 100644
--- a/llvm/unittests/Support/ParallelTest.cpp
+++ b/llvm/unittests/Support/ParallelTest.cpp
@@ -15,7 +15,6 @@
#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
#include "llvm/Support/ThreadPool.h"
#include "gtest/gtest.h"
-#include <array>
#include <random>
uint32_t array[1024 * 1024];
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 65052e7..caa7a6f 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -32,7 +32,6 @@
#include <cassert>
#include <cstdint>
#include <deque>
-#include <map>
#include <set>
#include <string>
#include <variant>
diff --git a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
index 4379c78..c7ab9cc 100644
--- a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
+++ b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include <algorithm>
-#include <array>
#include <deque>
#include <fstream>
#include <memory>
diff --git a/llvm/utils/gn/secondary/bolt/lib/Rewrite/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Rewrite/BUILD.gn
index 764ebb9..4ab1c95 100644
--- a/llvm/utils/gn/secondary/bolt/lib/Rewrite/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/lib/Rewrite/BUILD.gn
@@ -34,6 +34,7 @@ static_library("Rewrite") {
"MachORewriteInstance.cpp",
"MetadataManager.cpp",
"PseudoProbeRewriter.cpp",
+ "RSeqRewriter.cpp",
"RewriteInstance.cpp",
"SDTRewriter.cpp",
]
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
index 9c64db5..b01cfb9 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
@@ -30,6 +30,7 @@ static_library("bugprone") {
"CommandProcessorCheck.cpp",
"ComparePointerToMemberVirtualFunctionCheck.cpp",
"CopyConstructorInitCheck.cpp",
+ "CopyConstructorMutatesArgumentCheck.cpp",
"CrtpConstructorAccessibilityCheck.cpp",
"DanglingHandleCheck.cpp",
"DefaultOperatorNewOnOveralignedTypeCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
index 16f914a..18708f6 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
@@ -17,7 +17,6 @@ static_library("cert") {
sources = [
"CERTTidyModule.cpp",
"LimitedRandomnessCheck.cpp",
- "MutatingCopyCheck.cpp",
"ProperlySeededRandomGeneratorCheck.cpp",
"ThrownExceptionTypeCheck.cpp",
]
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn
index da380c9..52311b2 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn
@@ -33,7 +33,7 @@ static_library("cppcoreguidelines") {
"OwningMemoryCheck.cpp",
"PreferMemberInitializerCheck.cpp",
"ProBoundsArrayToPointerDecayCheck.cpp",
- "ProBoundsAvoidUncheckedContainerAccess.cpp",
+ "ProBoundsAvoidUncheckedContainerAccessCheck.cpp",
"ProBoundsConstantArrayIndexCheck.cpp",
"ProBoundsPointerArithmeticCheck.cpp",
"ProTypeConstCastCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
index a6848b3..72b6188 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
@@ -39,12 +39,12 @@ static_library("misc") {
"HeaderIncludeCycleCheck.cpp",
"IncludeCleanerCheck.cpp",
"MiscTidyModule.cpp",
- "MisleadingBidirectional.cpp",
- "MisleadingIdentifier.cpp",
+ "MisleadingBidirectionalCheck.cpp",
+ "MisleadingIdentifierCheck.cpp",
"MisplacedConstCheck.cpp",
"NewDeleteOverloadsCheck.cpp",
"NoRecursionCheck.cpp",
- "NonCopyableObjects.cpp",
+ "NonCopyableObjectsCheck.cpp",
"NonPrivateMemberVariablesInClassesCheck.cpp",
"OverrideWithDifferentVisibilityCheck.cpp",
"RedundantExpressionCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn
index e75b376..bc82225 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn
@@ -11,7 +11,7 @@ static_library("objc") {
"//llvm/lib/Support",
]
sources = [
- "AssertEquals.cpp",
+ "AssertEqualsCheck.cpp",
"AvoidNSErrorInitCheck.cpp",
"DeallocInCategoryCheck.cpp",
"ForbiddenSubclassingCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/performance/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/performance/BUILD.gn
index 5a29775..74de89f 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/performance/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/performance/BUILD.gn
@@ -31,7 +31,7 @@ static_library("performance") {
"PerformanceTidyModule.cpp",
"TriviallyDestructibleCheck.cpp",
"TypePromotionInMathFnCheck.cpp",
- "UnnecessaryCopyInitialization.cpp",
+ "UnnecessaryCopyInitializationCheck.cpp",
"UnnecessaryValueParamCheck.cpp",
]
}
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 3b0f38a..4fe3740 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -13,7 +13,7 @@ static_library("readability") {
]
sources = [
"AmbiguousSmartptrResetCallCheck.cpp",
- "AvoidConstParamsInDecls.cpp",
+ "AvoidConstParamsInDeclsCheck.cpp",
"AvoidNestedConditionalOperatorCheck.cpp",
"AvoidReturnWithVoidValueCheck.cpp",
"AvoidUnconditionalPreprocessorIfCheck.cpp",
@@ -22,7 +22,7 @@ static_library("readability") {
"ContainerContainsCheck.cpp",
"ContainerDataPointerCheck.cpp",
"ContainerSizeEmptyCheck.cpp",
- "ConvertMemberFunctionsToStatic.cpp",
+ "ConvertMemberFunctionsToStaticCheck.cpp",
"DeleteNullPointerCheck.cpp",
"DuplicateIncludeCheck.cpp",
"ElseAfterReturnCheck.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
index 6dc75540..3f6de22 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
@@ -30,6 +30,12 @@ tablegen("BPFGenMCPseudoLowering") {
td_file = "BPF.td"
}
+tablegen("BPFGenSDNodeInfo") {
+ visibility = [ ":LLVMBPFCodeGen" ]
+ args = [ "-gen-sd-node-info" ]
+ td_file = "BPF.td"
+}
+
tablegen("BPFGenRegisterBank") {
visibility = [ ":LLVMBPFCodeGen" ]
args = [ "-gen-register-bank" ]
@@ -43,6 +49,7 @@ static_library("LLVMBPFCodeGen") {
":BPFGenFastISel",
":BPFGenGlobalISel",
":BPFGenMCPseudoLowering",
+ ":BPFGenSDNodeInfo",
":BPFGenRegisterBank",
"MCTargetDesc",
"TargetInfo",