aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/benchmarks/SpecialCaseListBM.cpp29
-rw-r--r--llvm/include/llvm/ADT/APFloat.h152
-rw-r--r--llvm/include/llvm/ADT/Bitfields.h9
-rw-r--r--llvm/include/llvm/ADT/DenseMap.h2
-rw-r--r--llvm/include/llvm/ADT/DepthFirstIterator.h18
-rw-r--r--llvm/include/llvm/ADT/ImmutableSet.h6
-rw-r--r--llvm/include/llvm/ADT/PostOrderIterator.h6
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h2
-rw-r--r--llvm/include/llvm/ADT/STLForwardCompat.h48
-rw-r--r--llvm/include/llvm/ADT/SmallPtrSet.h12
-rw-r--r--llvm/include/llvm/ADT/bit.h42
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h1
-rw-r--r--llvm/include/llvm/Support/Alignment.h2
-rw-r--r--llvm/include/llvm/Support/Casting.h7
-rw-r--r--llvm/include/llvm/Support/CommandLine.h2
-rw-r--r--llvm/include/llvm/Support/DOTGraphTraits.h5
-rw-r--r--llvm/include/llvm/Support/ELFAttributes.h2
-rw-r--r--llvm/include/llvm/Support/LSP/Protocol.h2
-rw-r--r--llvm/include/llvm/Support/MD5.h2
-rw-r--r--llvm/include/llvm/Support/MathExtras.h53
-rw-r--r--llvm/include/llvm/Support/Timer.h2
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp10
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp53
-rw-r--r--llvm/lib/Support/APFloat.cpp538
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h9
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td675
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h5
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp42
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp186
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp107
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h2
-rw-r--r--llvm/test/CodeGen/AMDGPU/abs_i32.ll92
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll186
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll607
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll294
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll571
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll258
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll571
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll607
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll88
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll161
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll216
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll226
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll270
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll270
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll288
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll288
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll553
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll282
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll264
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll282
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll571
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll571
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll571
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll288
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll294
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll559
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll519
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll773
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll264
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll264
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll506
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll519
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll506
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll506
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll519
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll773
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll496
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll496
-rw-r--r--llvm/test/CodeGen/X86/combine-umax.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-umin.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-compress.ll70
-rw-r--r--llvm/test/CodeGen/X86/vselect-avx.ll16
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll15
-rw-r--r--llvm/test/Transforms/InstCombine/scmp.ll261
-rw-r--r--llvm/test/Transforms/SCCP/constant-range-struct.ll65
-rw-r--r--llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll16
-rw-r--r--llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp6
-rw-r--r--llvm/unittests/ADT/BitTest.cpp16
98 files changed, 23148 insertions, 649 deletions
diff --git a/llvm/benchmarks/SpecialCaseListBM.cpp b/llvm/benchmarks/SpecialCaseListBM.cpp
index 00aa3cd..b5d8268 100644
--- a/llvm/benchmarks/SpecialCaseListBM.cpp
+++ b/llvm/benchmarks/SpecialCaseListBM.cpp
@@ -110,6 +110,26 @@ std::string genGlobAtBothSides(const std::vector<std::string> &Files) {
return S;
}
+std::string genGlobAtBothSidesAndMid(const std::vector<std::string> &Files) {
+ std::string S;
+ std::minstd_rand Rng(RNG_SEED);
+ for (std::string F : Files) {
+ std::uniform_int_distribution<> PosDistrib(0, F.size() - 1);
+ F[PosDistrib(Rng)] = '*';
+
+ std::uniform_int_distribution<> Ends(0, 1);
+ if (Ends(Rng)) {
+ F.back() = '*';
+ F.front() = '*';
+ }
+
+ S += "src:";
+ S += F;
+ S += "\n";
+ }
+ return S;
+}
+
void BM_Make_(
benchmark::State &state,
std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) {
@@ -171,6 +191,9 @@ BENCHMARK_CAPTURE(BM_Make_, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_Make_, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_CAPTURE(BM_True_, None_, genGlobNone)
->RangeMultiplier(MAX_LIST_MUL)
@@ -187,6 +210,9 @@ BENCHMARK_CAPTURE(BM_True_, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_True_, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_CAPTURE(BM_False, None_, genGlobNone)
->RangeMultiplier(MAX_LIST_MUL)
@@ -203,5 +229,8 @@ BENCHMARK_CAPTURE(BM_False, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_False, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_MAIN();
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index a1bfce7..bccdb89 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -138,10 +138,16 @@ enum lostFraction { // Example of truncated bits:
/// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward.
///
+namespace detail {
+class IEEEFloat;
+class DoubleAPFloat;
+} // namespace detail
+
// This is the common type definitions shared by APFloat and its internal
// implementation classes. This struct should not define any non-static data
// members.
-struct APFloatBase {
+class APFloatBase {
+public:
typedef APInt::WordType integerPart;
static constexpr unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD;
@@ -257,30 +263,64 @@ struct APFloatBase {
LLVM_ABI static const llvm::fltSemantics &EnumToSemantics(Semantics S);
LLVM_ABI static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);
- LLVM_ABI static const fltSemantics &IEEEhalf() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &BFloat() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &IEEEsingle() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &IEEEdouble() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &IEEEquad() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &PPCDoubleDoubleLegacy() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E5M2() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E4M3() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E4M3FN() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E3M4() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &FloatTF32() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float8E8M0FNU() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float6E3M2FN() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float6E2M3FN() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &Float4E2M1FN() LLVM_READNONE;
- LLVM_ABI static const fltSemantics &x87DoubleExtended() LLVM_READNONE;
+private:
+ LLVM_ABI static const fltSemantics semIEEEhalf;
+ LLVM_ABI static const fltSemantics semBFloat;
+ LLVM_ABI static const fltSemantics semIEEEsingle;
+ LLVM_ABI static const fltSemantics semIEEEdouble;
+ LLVM_ABI static const fltSemantics semIEEEquad;
+ LLVM_ABI static const fltSemantics semFloat8E5M2;
+ LLVM_ABI static const fltSemantics semFloat8E5M2FNUZ;
+ LLVM_ABI static const fltSemantics semFloat8E4M3;
+ LLVM_ABI static const fltSemantics semFloat8E4M3FN;
+ LLVM_ABI static const fltSemantics semFloat8E4M3FNUZ;
+ LLVM_ABI static const fltSemantics semFloat8E4M3B11FNUZ;
+ LLVM_ABI static const fltSemantics semFloat8E3M4;
+ LLVM_ABI static const fltSemantics semFloatTF32;
+ LLVM_ABI static const fltSemantics semFloat8E8M0FNU;
+ LLVM_ABI static const fltSemantics semFloat6E3M2FN;
+ LLVM_ABI static const fltSemantics semFloat6E2M3FN;
+ LLVM_ABI static const fltSemantics semFloat4E2M1FN;
+ LLVM_ABI static const fltSemantics semX87DoubleExtended;
+ LLVM_ABI static const fltSemantics semBogus;
+ LLVM_ABI static const fltSemantics semPPCDoubleDouble;
+ LLVM_ABI static const fltSemantics semPPCDoubleDoubleLegacy;
+
+ friend class detail::IEEEFloat;
+ friend class detail::DoubleAPFloat;
+ friend class APFloat;
+
+public:
+ static const fltSemantics &IEEEhalf() { return semIEEEhalf; }
+ static const fltSemantics &BFloat() { return semBFloat; }
+ static const fltSemantics &IEEEsingle() { return semIEEEsingle; }
+ static const fltSemantics &IEEEdouble() { return semIEEEdouble; }
+ static const fltSemantics &IEEEquad() { return semIEEEquad; }
+ static const fltSemantics &PPCDoubleDouble() { return semPPCDoubleDouble; }
+ static const fltSemantics &PPCDoubleDoubleLegacy() {
+ return semPPCDoubleDoubleLegacy;
+ }
+ static const fltSemantics &Float8E5M2() { return semFloat8E5M2; }
+ static const fltSemantics &Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
+ static const fltSemantics &Float8E4M3() { return semFloat8E4M3; }
+ static const fltSemantics &Float8E4M3FN() { return semFloat8E4M3FN; }
+ static const fltSemantics &Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
+ static const fltSemantics &Float8E4M3B11FNUZ() {
+ return semFloat8E4M3B11FNUZ;
+ }
+ static const fltSemantics &Float8E3M4() { return semFloat8E3M4; }
+ static const fltSemantics &FloatTF32() { return semFloatTF32; }
+ static const fltSemantics &Float8E8M0FNU() { return semFloat8E8M0FNU; }
+ static const fltSemantics &Float6E3M2FN() { return semFloat6E3M2FN; }
+ static const fltSemantics &Float6E2M3FN() { return semFloat6E2M3FN; }
+ static const fltSemantics &Float4E2M1FN() { return semFloat4E2M1FN; }
+ static const fltSemantics &x87DoubleExtended() {
+ return semX87DoubleExtended;
+ }
/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
/// anything real.
- LLVM_ABI static const fltSemantics &Bogus() LLVM_READNONE;
+ static const fltSemantics &Bogus() { return semBogus; }
// Returns true if any number described by this semantics can be precisely
// represented by the specified semantics. Does not take into account
@@ -927,69 +967,11 @@ class APFloat : public APFloatBase {
llvm_unreachable("Unexpected semantics");
}
- ~Storage() {
- if (usesLayout<IEEEFloat>(*semantics)) {
- IEEE.~IEEEFloat();
- return;
- }
- if (usesLayout<DoubleAPFloat>(*semantics)) {
- Double.~DoubleAPFloat();
- return;
- }
- llvm_unreachable("Unexpected semantics");
- }
-
- Storage(const Storage &RHS) {
- if (usesLayout<IEEEFloat>(*RHS.semantics)) {
- new (this) IEEEFloat(RHS.IEEE);
- return;
- }
- if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
- new (this) DoubleAPFloat(RHS.Double);
- return;
- }
- llvm_unreachable("Unexpected semantics");
- }
-
- Storage(Storage &&RHS) {
- if (usesLayout<IEEEFloat>(*RHS.semantics)) {
- new (this) IEEEFloat(std::move(RHS.IEEE));
- return;
- }
- if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
- new (this) DoubleAPFloat(std::move(RHS.Double));
- return;
- }
- llvm_unreachable("Unexpected semantics");
- }
-
- Storage &operator=(const Storage &RHS) {
- if (usesLayout<IEEEFloat>(*semantics) &&
- usesLayout<IEEEFloat>(*RHS.semantics)) {
- IEEE = RHS.IEEE;
- } else if (usesLayout<DoubleAPFloat>(*semantics) &&
- usesLayout<DoubleAPFloat>(*RHS.semantics)) {
- Double = RHS.Double;
- } else if (this != &RHS) {
- this->~Storage();
- new (this) Storage(RHS);
- }
- return *this;
- }
-
- Storage &operator=(Storage &&RHS) {
- if (usesLayout<IEEEFloat>(*semantics) &&
- usesLayout<IEEEFloat>(*RHS.semantics)) {
- IEEE = std::move(RHS.IEEE);
- } else if (usesLayout<DoubleAPFloat>(*semantics) &&
- usesLayout<DoubleAPFloat>(*RHS.semantics)) {
- Double = std::move(RHS.Double);
- } else if (this != &RHS) {
- this->~Storage();
- new (this) Storage(std::move(RHS));
- }
- return *this;
- }
+ LLVM_ABI ~Storage();
+ LLVM_ABI Storage(const Storage &RHS);
+ LLVM_ABI Storage(Storage &&RHS);
+ LLVM_ABI Storage &operator=(const Storage &RHS);
+ LLVM_ABI Storage &operator=(Storage &&RHS);
} U;
template <typename T> static bool usesLayout(const fltSemantics &Semantics) {
diff --git a/llvm/include/llvm/ADT/Bitfields.h b/llvm/include/llvm/ADT/Bitfields.h
index 1af2761..1fbc41c 100644
--- a/llvm/include/llvm/ADT/Bitfields.h
+++ b/llvm/include/llvm/ADT/Bitfields.h
@@ -154,12 +154,9 @@ struct ResolveUnderlyingType {
using type = std::underlying_type_t<T>;
};
template <typename T> struct ResolveUnderlyingType<T, false> {
- using type = T;
-};
-template <> struct ResolveUnderlyingType<bool, false> {
- /// In case sizeof(bool) != 1, replace `void` by an additionnal
- /// std::conditional.
- using type = std::conditional_t<sizeof(bool) == 1, uint8_t, void>;
+ static_assert(!std::is_same_v<T, bool> || sizeof(bool) == 1,
+ "T being bool requires sizeof(bool) == 1.");
+ using type = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>;
};
} // namespace bitfields_details
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 4bda50f..25b5262 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -42,7 +42,7 @@ namespace detail {
// We extend a pair to allow users to override the bucket type with their own
// implementation without requiring two members.
template <typename KeyT, typename ValueT>
-struct DenseMapPair : public std::pair<KeyT, ValueT> {
+struct DenseMapPair : std::pair<KeyT, ValueT> {
using std::pair<KeyT, ValueT>::pair;
KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index 4ced758..3c54f32 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -66,8 +66,8 @@ public:
// one more method, completed, which is invoked when all children of a
// node have been processed. It is intended to distinguish of back and
// cross edges in the spanning tree but is not used in the common case.
-template <typename NodeRef, unsigned SmallSize=8>
-struct df_iterator_default_set : public SmallPtrSet<NodeRef, SmallSize> {
+template <typename NodeRef, unsigned SmallSize = 8>
+struct df_iterator_default_set : SmallPtrSet<NodeRef, SmallSize> {
using BaseSet = SmallPtrSet<NodeRef, SmallSize>;
using iterator = typename BaseSet::iterator;
@@ -235,8 +235,10 @@ iterator_range<df_iterator<T>> depth_first(const T& G) {
}
// Provide global definitions of external depth first iterators...
-template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
-struct df_ext_iterator : public df_iterator<T, SetTy, true> {
+template <class T,
+ class SetTy =
+ df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
+struct df_ext_iterator : df_iterator<T, SetTy, true> {
df_ext_iterator(const df_iterator<T, SetTy, true> &V)
: df_iterator<T, SetTy, true>(V) {}
};
@@ -262,7 +264,7 @@ template <class T,
class SetTy =
df_iterator_default_set<typename GraphTraits<T>::NodeRef>,
bool External = false>
-struct idf_iterator : public df_iterator<Inverse<T>, SetTy, External> {
+struct idf_iterator : df_iterator<Inverse<T>, SetTy, External> {
idf_iterator(const df_iterator<Inverse<T>, SetTy, External> &V)
: df_iterator<Inverse<T>, SetTy, External>(V) {}
};
@@ -284,8 +286,10 @@ iterator_range<idf_iterator<T>> inverse_depth_first(const T& G) {
}
// Provide global definitions of external inverse depth first iterators...
-template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
-struct idf_ext_iterator : public idf_iterator<T, SetTy, true> {
+template <class T,
+ class SetTy =
+ df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
+struct idf_ext_iterator : idf_iterator<T, SetTy, true> {
idf_ext_iterator(const idf_iterator<T, SetTy, true> &V)
: idf_iterator<T, SetTy, true>(V) {}
idf_ext_iterator(const df_iterator<Inverse<T>, SetTy, true> &V)
diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h
index 310539f..8b2425e 100644
--- a/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/llvm/include/llvm/ADT/ImmutableSet.h
@@ -931,8 +931,7 @@ struct ImutProfileInfo<T*> {
/// ImutContainerInfo - Generic definition of comparison operations for
/// elements of immutable containers that defaults to using
/// std::equal_to<> and std::less<> to perform comparison of elements.
-template <typename T>
-struct ImutContainerInfo : public ImutProfileInfo<T> {
+template <typename T> struct ImutContainerInfo : ImutProfileInfo<T> {
using value_type = typename ImutProfileInfo<T>::value_type;
using value_type_ref = typename ImutProfileInfo<T>::value_type_ref;
using key_type = value_type;
@@ -957,8 +956,7 @@ struct ImutContainerInfo : public ImutProfileInfo<T> {
/// ImutContainerInfo - Specialization for pointer values to treat pointers
/// as references to unique objects. Pointers are thus compared by
/// their addresses.
-template <typename T>
-struct ImutContainerInfo<T*> : public ImutProfileInfo<T*> {
+template <typename T> struct ImutContainerInfo<T *> : ImutProfileInfo<T *> {
using value_type = typename ImutProfileInfo<T*>::value_type;
using value_type_ref = typename ImutProfileInfo<T*>::value_type_ref;
using key_type = value_type;
diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index 1cbd3c1..d9aa452 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -200,7 +200,7 @@ template <class T> iterator_range<po_iterator<T>> post_order(const T &G) {
// Provide global definitions of external postorder iterators...
template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>>
-struct po_ext_iterator : public po_iterator<T, SetType, true> {
+struct po_ext_iterator : po_iterator<T, SetType, true> {
po_ext_iterator(const po_iterator<T, SetType, true> &V) :
po_iterator<T, SetType, true>(V) {}
};
@@ -223,7 +223,7 @@ iterator_range<po_ext_iterator<T, SetType>> post_order_ext(const T &G, SetType &
// Provide global definitions of inverse post order iterators...
template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>,
bool External = false>
-struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External> {
+struct ipo_iterator : po_iterator<Inverse<T>, SetType, External> {
ipo_iterator(const po_iterator<Inverse<T>, SetType, External> &V) :
po_iterator<Inverse<T>, SetType, External> (V) {}
};
@@ -245,7 +245,7 @@ iterator_range<ipo_iterator<T>> inverse_post_order(const T &G) {
// Provide global definitions of external inverse postorder iterators...
template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>>
-struct ipo_ext_iterator : public ipo_iterator<T, SetType, true> {
+struct ipo_ext_iterator : ipo_iterator<T, SetType, true> {
ipo_ext_iterator(const ipo_iterator<T, SetType, true> &V) :
ipo_iterator<T, SetType, true>(V) {}
ipo_ext_iterator(const po_iterator<Inverse<T>, SetType, true> &V) :
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 658f262..a9841c6 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -674,7 +674,7 @@ using zip_traits = iterator_facade_base<
ReferenceTupleType *, ReferenceTupleType>;
template <typename ZipType, typename ReferenceTupleType, typename... Iters>
-struct zip_common : public zip_traits<ZipType, ReferenceTupleType, Iters...> {
+struct zip_common : zip_traits<ZipType, ReferenceTupleType, Iters...> {
using Base = zip_traits<ZipType, ReferenceTupleType, Iters...>;
using IndexSequence = std::index_sequence_for<Iters...>;
using value_type = typename Base::value_type;
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index da9d3ab0..273a5cf 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -26,6 +26,54 @@ namespace llvm {
// Features from C++20
//===----------------------------------------------------------------------===//
+namespace numbers {
+// clang-format off
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T e_v = T(0x1.5bf0a8b145769P+1); // (2.7182818284590452354) https://oeis.org/A001113
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T egamma_v = T(0x1.2788cfc6fb619P-1); // (.57721566490153286061) https://oeis.org/A001620
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T ln2_v = T(0x1.62e42fefa39efP-1); // (.69314718055994530942) https://oeis.org/A002162
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T ln10_v = T(0x1.26bb1bbb55516P+1); // (2.3025850929940456840) https://oeis.org/A002392
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T log2e_v = T(0x1.71547652b82feP+0); // (1.4426950408889634074)
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T log10e_v = T(0x1.bcb7b1526e50eP-2); // (.43429448190325182765)
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T pi_v = T(0x1.921fb54442d18P+1); // (3.1415926535897932385) https://oeis.org/A000796
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T inv_pi_v = T(0x1.45f306dc9c883P-2); // (.31830988618379067154) https://oeis.org/A049541
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T inv_sqrtpi_v = T(0x1.20dd750429b6dP-1); // (.56418958354775628695) https://oeis.org/A087197
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T sqrt2_v = T(0x1.6a09e667f3bcdP+0); // (1.4142135623730950488) https://oeis.org/A00219
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T inv_sqrt2_v = T(0x1.6a09e667f3bcdP-1); // (.70710678118654752440)
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T sqrt3_v = T(0x1.bb67ae8584caaP+0); // (1.7320508075688772935) https://oeis.org/A002194
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T inv_sqrt3_v = T(0x1.279a74590331cP-1); // (.57735026918962576451)
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T phi_v = T(0x1.9e3779b97f4a8P+0); // (1.6180339887498948482) https://oeis.org/A001622
+
+inline constexpr double e = e_v<double>;
+inline constexpr double egamma = egamma_v<double>;
+inline constexpr double ln2 = ln2_v<double>;
+inline constexpr double ln10 = ln10_v<double>;
+inline constexpr double log2e = log2e_v<double>;
+inline constexpr double log10e = log10e_v<double>;
+inline constexpr double pi = pi_v<double>;
+inline constexpr double inv_pi = inv_pi_v<double>;
+inline constexpr double inv_sqrtpi = inv_sqrtpi_v<double>;
+inline constexpr double sqrt2 = sqrt2_v<double>;
+inline constexpr double inv_sqrt2 = inv_sqrt2_v<double>;
+inline constexpr double sqrt3 = sqrt3_v<double>;
+inline constexpr double inv_sqrt3 = inv_sqrt3_v<double>;
+inline constexpr double phi = phi_v<double>;
+// clang-format on
+} // namespace numbers
+
template <typename T>
struct remove_cvref // NOLINT(readability-identifier-naming)
{
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index f588a77..8e7c8b3 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -532,18 +532,8 @@ class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
using BaseT = SmallPtrSetImpl<PtrType>;
- // A constexpr version of llvm::bit_ceil.
- // TODO: Replace this with std::bit_ceil once C++20 is available.
- static constexpr size_t RoundUpToPowerOfTwo(size_t X) {
- size_t C = 1;
- size_t CMax = C << (std::numeric_limits<size_t>::digits - 1);
- while (C < X && C < CMax)
- C <<= 1;
- return C;
- }
-
// Make sure that SmallSize is a power of two, round up if not.
- static constexpr size_t SmallSizePowTwo = RoundUpToPowerOfTwo(SmallSize);
+ static constexpr size_t SmallSizePowTwo = llvm::bit_ceil_constexpr(SmallSize);
/// SmallStorage - Fixed size storage used in 'small mode'.
const void *SmallStorage[SmallSizePowTwo];
diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index 66c4f94..8b60b69 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -336,34 +336,44 @@ template <typename T> [[nodiscard]] T bit_ceil(T Value) {
return T(1) << llvm::bit_width<T>(Value - 1u);
}
-// Forward-declare rotr so that rotl can use it.
-template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
-[[nodiscard]] constexpr T rotr(T V, int R);
+/// Returns the smallest integral power of two no smaller than Value if Value is
+/// nonzero. Returns 1 otherwise.
+///
+/// Ex. bit_ceil(5) == 8.
+///
+/// The return value is undefined if the input is larger than the largest power
+/// of two representable in T.
+template <typename T> [[nodiscard]] constexpr T bit_ceil_constexpr(T Value) {
+ static_assert(std::is_unsigned_v<T>,
+ "Only unsigned integral types are allowed.");
+ if (Value < 2)
+ return 1;
+ return T(1) << llvm::bit_width_constexpr<T>(Value - 1u);
+}
template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
[[nodiscard]] constexpr T rotl(T V, int R) {
- unsigned N = std::numeric_limits<T>::digits;
+ constexpr unsigned N = std::numeric_limits<T>::digits;
- R = R % N;
- if (!R)
- return V;
+ static_assert(has_single_bit(N), "& (N - 1) is only valid for powers of two");
+ R = R & (N - 1);
- if (R < 0)
- return llvm::rotr(V, -R);
+ if (R == 0)
+ return V;
return (V << R) | (V >> (N - R));
}
-template <typename T, typename> [[nodiscard]] constexpr T rotr(T V, int R) {
- unsigned N = std::numeric_limits<T>::digits;
+template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
+[[nodiscard]] constexpr T rotr(T V, int R) {
+ constexpr unsigned N = std::numeric_limits<T>::digits;
+
+ static_assert(has_single_bit(N), "& (N - 1) is only valid for powers of two");
+ R = R & (N - 1);
- R = R % N;
- if (!R)
+ if (R == 0)
return V;
- if (R < 0)
- return llvm::rotl(V, -R);
-
return (V >> R) | (V << (N - R));
}
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index e5a6c8c..3d3ec14 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1345,6 +1345,7 @@ public:
class LoopGuards {
DenseMap<const SCEV *, const SCEV *> RewriteMap;
+ SmallDenseSet<std::pair<const SCEV *, const SCEV *>> NotEqual;
bool PreserveNUW = false;
bool PreserveNSW = false;
ScalarEvolution &SE;
diff --git a/llvm/include/llvm/Support/Alignment.h b/llvm/include/llvm/Support/Alignment.h
index a4ca54e..f9d7c76 100644
--- a/llvm/include/llvm/Support/Alignment.h
+++ b/llvm/include/llvm/Support/Alignment.h
@@ -103,7 +103,7 @@ inline Align assumeAligned(uint64_t Value) {
/// This struct is a compact representation of a valid (power of two) or
/// undefined (0) alignment.
-struct MaybeAlign : public std::optional<Align> {
+struct MaybeAlign : std::optional<Align> {
private:
using UP = std::optional<Align>;
diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h
index 2a9a149..6f6df2e 100644
--- a/llvm/include/llvm/Support/Casting.h
+++ b/llvm/include/llvm/Support/Casting.h
@@ -340,7 +340,7 @@ struct ValueFromPointerCast
/// during the cast. It's also a good example of how to implement a move-only
/// cast.
template <typename To, typename From, typename Derived = void>
-struct UniquePtrCast : public CastIsPossible<To, From *> {
+struct UniquePtrCast : CastIsPossible<To, From *> {
using Self = detail::SelfType<Derived, UniquePtrCast<To, From>>;
using CastResultType = std::unique_ptr<
std::remove_reference_t<typename cast_retty<To, From>::ret_type>>;
@@ -473,7 +473,7 @@ struct ForwardToPointerCast {
// take advantage of the cast traits whenever possible!
template <typename To, typename From, typename Enable = void>
-struct CastInfo : public CastIsPossible<To, From> {
+struct CastInfo : CastIsPossible<To, From> {
using Self = CastInfo<To, From, Enable>;
using CastReturnType = typename cast_retty<To, From>::ret_type;
@@ -536,8 +536,7 @@ struct CastInfo<To, std::unique_ptr<From>> : public UniquePtrCast<To, From> {};
/// the input is std::optional<From> that the output can be std::optional<To>.
/// If that's not the case, specialize CastInfo for your use case.
template <typename To, typename From>
-struct CastInfo<To, std::optional<From>> : public OptionalValueCast<To, From> {
-};
+struct CastInfo<To, std::optional<From>> : OptionalValueCast<To, From> {};
/// isa<X> - Return true if the parameter to the template is an instance of one
/// of the template type arguments. Used like this:
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index dd05c53..5a5f00e 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -549,7 +549,7 @@ template <class DataType> struct OptionValue;
// The default value safely does nothing. Option value printing is only
// best-effort.
template <class DataType, bool isClass>
-struct OptionValueBase : public GenericOptionValue {
+struct OptionValueBase : GenericOptionValue {
// Temporary storage for argument passing.
using WrapperType = OptionValue<DataType>;
diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h
index ffa9abe..3b9fe00 100644
--- a/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -162,9 +162,8 @@ public:
/// graphs are converted to 'dot' graphs. When specializing, you may inherit
/// from DefaultDOTGraphTraits if you don't need to override everything.
///
-template <typename Ty>
-struct DOTGraphTraits : public DefaultDOTGraphTraits {
- DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {}
+template <typename Ty> struct DOTGraphTraits : DefaultDOTGraphTraits {
+ using DefaultDOTGraphTraits::DefaultDOTGraphTraits;
};
} // End llvm namespace
diff --git a/llvm/include/llvm/Support/ELFAttributes.h b/llvm/include/llvm/Support/ELFAttributes.h
index 270246f..5771a84 100644
--- a/llvm/include/llvm/Support/ELFAttributes.h
+++ b/llvm/include/llvm/Support/ELFAttributes.h
@@ -48,8 +48,6 @@ struct SubsectionAndTagToTagName {
StringRef SubsectionName;
unsigned Tag;
StringRef TagName;
- SubsectionAndTagToTagName(StringRef SN, unsigned Tg, StringRef TN)
- : SubsectionName(SN), Tag(Tg), TagName(TN) {}
};
namespace ELFAttrs {
diff --git a/llvm/include/llvm/Support/LSP/Protocol.h b/llvm/include/llvm/Support/LSP/Protocol.h
index 93b82f1..e38203a 100644
--- a/llvm/include/llvm/Support/LSP/Protocol.h
+++ b/llvm/include/llvm/Support/LSP/Protocol.h
@@ -449,7 +449,7 @@ struct ReferenceContext {
bool fromJSON(const llvm::json::Value &value, ReferenceContext &result,
llvm::json::Path path);
-struct ReferenceParams : public TextDocumentPositionParams {
+struct ReferenceParams : TextDocumentPositionParams {
ReferenceContext context;
};
diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index ed29826..4ba3867 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -41,7 +41,7 @@ template <typename T> class ArrayRef;
class MD5 {
public:
- struct MD5Result : public std::array<uint8_t, 16> {
+ struct MD5Result : std::array<uint8_t, 16> {
LLVM_ABI SmallString<32> digest() const;
uint64_t low() const {
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index c2716a9..41232335 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_MATHEXTRAS_H
#define LLVM_SUPPORT_MATHEXTRAS_H
+#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/bit.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
@@ -42,38 +43,28 @@ using common_sint =
/// Mathematical constants.
namespace numbers {
-// TODO: Track C++20 std::numbers.
// clang-format off
-constexpr double e = 0x1.5bf0a8b145769P+1, // (2.7182818284590452354) https://oeis.org/A001113
- egamma = 0x1.2788cfc6fb619P-1, // (.57721566490153286061) https://oeis.org/A001620
- ln2 = 0x1.62e42fefa39efP-1, // (.69314718055994530942) https://oeis.org/A002162
- ln10 = 0x1.26bb1bbb55516P+1, // (2.3025850929940456840) https://oeis.org/A002392
- log2e = 0x1.71547652b82feP+0, // (1.4426950408889634074)
- log10e = 0x1.bcb7b1526e50eP-2, // (.43429448190325182765)
- pi = 0x1.921fb54442d18P+1, // (3.1415926535897932385) https://oeis.org/A000796
- inv_pi = 0x1.45f306dc9c883P-2, // (.31830988618379067154) https://oeis.org/A049541
- sqrtpi = 0x1.c5bf891b4ef6bP+0, // (1.7724538509055160273) https://oeis.org/A002161
- inv_sqrtpi = 0x1.20dd750429b6dP-1, // (.56418958354775628695) https://oeis.org/A087197
- sqrt2 = 0x1.6a09e667f3bcdP+0, // (1.4142135623730950488) https://oeis.org/A00219
- inv_sqrt2 = 0x1.6a09e667f3bcdP-1, // (.70710678118654752440)
- sqrt3 = 0x1.bb67ae8584caaP+0, // (1.7320508075688772935) https://oeis.org/A002194
- inv_sqrt3 = 0x1.279a74590331cP-1, // (.57735026918962576451)
- phi = 0x1.9e3779b97f4a8P+0; // (1.6180339887498948482) https://oeis.org/A001622
-constexpr float ef = 0x1.5bf0a8P+1F, // (2.71828183) https://oeis.org/A001113
- egammaf = 0x1.2788d0P-1F, // (.577215665) https://oeis.org/A001620
- ln2f = 0x1.62e430P-1F, // (.693147181) https://oeis.org/A002162
- ln10f = 0x1.26bb1cP+1F, // (2.30258509) https://oeis.org/A002392
- log2ef = 0x1.715476P+0F, // (1.44269504)
- log10ef = 0x1.bcb7b2P-2F, // (.434294482)
- pif = 0x1.921fb6P+1F, // (3.14159265) https://oeis.org/A000796
- inv_pif = 0x1.45f306P-2F, // (.318309886) https://oeis.org/A049541
- sqrtpif = 0x1.c5bf8aP+0F, // (1.77245385) https://oeis.org/A002161
- inv_sqrtpif = 0x1.20dd76P-1F, // (.564189584) https://oeis.org/A087197
- sqrt2f = 0x1.6a09e6P+0F, // (1.41421356) https://oeis.org/A002193
- inv_sqrt2f = 0x1.6a09e6P-1F, // (.707106781)
- sqrt3f = 0x1.bb67aeP+0F, // (1.73205081) https://oeis.org/A002194
- inv_sqrt3f = 0x1.279a74P-1F, // (.577350269)
- phif = 0x1.9e377aP+0F; // (1.61803399) https://oeis.org/A001622
+inline constexpr float ef = e_v<float>;
+inline constexpr float egammaf = egamma_v<float>;
+inline constexpr float ln2f = ln2_v<float>;
+inline constexpr float ln10f = ln10_v<float>;
+inline constexpr float log2ef = log2e_v<float>;
+inline constexpr float log10ef = log10e_v<float>;
+inline constexpr float pif = pi_v<float>;
+inline constexpr float inv_pif = inv_pi_v<float>;
+inline constexpr float inv_sqrtpif = inv_sqrtpi_v<float>;
+inline constexpr float sqrt2f = sqrt2_v<float>;
+inline constexpr float inv_sqrt2f = inv_sqrt2_v<float>;
+inline constexpr float sqrt3f = sqrt3_v<float>;
+inline constexpr float inv_sqrt3f = inv_sqrt3_v<float>;
+inline constexpr float phif = phi_v<float>;
+
+// sqrtpi is not in C++20 std::numbers.
+template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+inline constexpr T sqrtpi_v = T(0x1.c5bf891b4ef6bP+0); // (1.7724538509055160273) https://oeis.org/A002161
+inline constexpr double sqrtpi = sqrtpi_v<double>;
+inline constexpr float sqrtpif = sqrtpi_v<float>;
+
// These string literals are taken from below:
// https://github.com/bminor/glibc/blob/8543577b04ded6d979ffcc5a818930e4d74d0645/math/math.h#L1215-L1229
constexpr const char *pis = "3.141592653589793238462643383279502884",
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index 40709d4..a4ed712 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -167,7 +167,7 @@ public:
/// you to declare a new timer, AND specify the region to time, all in one
/// statement. All timers with the same name are merged. This is primarily
/// used for debugging and for hunting performance problems.
-struct NamedRegionTimer : public TimeRegion {
+struct NamedRegionTimer : TimeRegion {
LLVM_ABI explicit NamedRegionTimer(StringRef Name, StringRef Description,
StringRef GroupName,
StringRef GroupDescription,
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 0e5bc48..df75999 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -947,9 +947,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
/*UseBlockValue*/ false));
}
- ValueLatticeElement Result = TrueVal;
- Result.mergeIn(FalseVal);
- return Result;
+ TrueVal.mergeIn(FalseVal);
+ return TrueVal;
}
std::optional<ConstantRange>
@@ -1778,9 +1777,8 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
assert(OptResult && "Value not available after solving");
}
- ValueLatticeElement Result = *OptResult;
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
- return Result;
+ LLVM_DEBUG(dbgs() << " Result = " << *OptResult << "\n");
+ return *OptResult;
}
ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e06b095..425420f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15740,19 +15740,26 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
GetNextSCEVDividesByDivisor(One, DividesBy);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
+ // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
+ // but creating the subtraction eagerly is expensive. Track the
+ // inequalities in a separate map, and materialize the rewrite lazily
+ // when encountering a suitable subtraction while re-writing.
if (LHS->getType()->isPointerTy()) {
LHS = SE.getLosslessPtrToIntExpr(LHS);
RHS = SE.getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
break;
}
- auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
- const SCEV *Sub = SE.getMinusSCEV(A, B);
- AddRewrite(Sub, Sub,
- SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
- };
- AddSubRewrite(LHS, RHS);
- AddSubRewrite(RHS, LHS);
+ const SCEVConstant *C;
+ const SCEV *A, *B;
+ if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
+ match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) {
+ RHS = A;
+ LHS = B;
+ }
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ Guards.NotEqual.insert({LHS, RHS});
continue;
}
break;
@@ -15886,13 +15893,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
class SCEVLoopGuardRewriter
: public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
const DenseMap<const SCEV *, const SCEV *> &Map;
+ const SmallDenseSet<std::pair<const SCEV *, const SCEV *>> &NotEqual;
SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap;
public:
SCEVLoopGuardRewriter(ScalarEvolution &SE,
const ScalarEvolution::LoopGuards &Guards)
- : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) {
+ : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap),
+ NotEqual(Guards.NotEqual) {
if (Guards.PreserveNUW)
FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW);
if (Guards.PreserveNSW)
@@ -15947,14 +15956,36 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ // Helper to check if S is a subtraction (A - B) where A != B, and if so,
+ // return UMax(S, 1).
+ auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * {
+ const SCEV *LHS, *RHS;
+ if (MatchBinarySub(S, LHS, RHS)) {
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ if (NotEqual.contains({LHS, RHS}))
+ return SE.getUMaxExpr(S, SE.getOne(S->getType()));
+ }
+ return nullptr;
+ };
+
+ // Check if Expr itself is a subtraction pattern with guard info.
+ if (const SCEV *Rewritten = RewriteSubtraction(Expr))
+ return Rewritten;
+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
// (Const + A + B). There may be guard info for A + B, and if so, apply
// it.
// TODO: Could more generally apply guards to Add sub-expressions.
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ const SCEV *Add =
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2));
+ if (const SCEV *Rewritten = RewriteSubtraction(Add))
+ return SE.getAddExpr(
+ Expr->getOperand(0), Rewritten,
+ ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask));
+ if (const SCEV *S = Map.lookup(Add))
return SE.getAddExpr(Expr->getOperand(0), S);
}
SmallVector<const SCEV *, 2> Operands;
@@ -15989,7 +16020,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
};
- if (RewriteMap.empty())
+ if (RewriteMap.empty() && NotEqual.empty())
return Expr;
SCEVLoopGuardRewriter Rewriter(SE, *this);
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 8623c06..b4de79a 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -130,44 +130,46 @@ struct fltSemantics {
bool hasSignBitInMSB = true;
};
-static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
-static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
-static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
-static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
-static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
-static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
-static constexpr fltSemantics semFloat8E5M2FNUZ = {
+constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
+constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
+constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
+constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
+constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
+constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
+constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
-static constexpr fltSemantics semFloat8E4M3FN = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
+constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
-static constexpr fltSemantics semFloat8E4M3FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
-static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
-static constexpr fltSemantics semFloat8E8M0FNU = {127,
- -127,
- 1,
- 8,
- fltNonfiniteBehavior::NanOnly,
- fltNanEncoding::AllOnes,
- false,
- false,
- false};
-
-static constexpr fltSemantics semFloat6E3M2FN = {
+constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
+constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
+constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
+ 127,
+ -127,
+ 1,
+ 8,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::AllOnes,
+ false,
+ false,
+ false};
+
+constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat6E2M3FN = {
+constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat4E2M1FN = {
+constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
-static constexpr fltSemantics semBogus = {0, 0, 0, 0};
-static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
- 53 + 53, 128};
+constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
+ 80};
+constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
+constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
+constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
+ 1023, -1022 + 53, 53 + 53, 128};
const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
switch (S) {
@@ -261,36 +263,6 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
llvm_unreachable("Unknown floating semantics");
}
-const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
-const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
-const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
-const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
-const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
-const fltSemantics &APFloatBase::PPCDoubleDouble() {
- return semPPCDoubleDouble;
-}
-const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
- return semPPCDoubleDoubleLegacy;
-}
-const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
-const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
-const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
-const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
- return semFloat8E4M3B11FNUZ;
-}
-const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
-const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
-const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
-const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
-const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
-const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
-const fltSemantics &APFloatBase::x87DoubleExtended() {
- return semX87DoubleExtended;
-}
-const fltSemantics &APFloatBase::Bogus() { return semBogus; }
-
bool APFloatBase::isRepresentableBy(const fltSemantics &A,
const fltSemantics &B) {
return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
@@ -1029,7 +1001,7 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// For x87 extended precision, we want to make a NaN, not a
// pseudo-NaN. Maybe we should expose the ability to make
// pseudo-NaNs?
- if (semantics == &semX87DoubleExtended)
+ if (semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significand, QNaNBit + 1);
}
@@ -1054,7 +1026,7 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
category = rhs.category;
sign = rhs.sign;
- rhs.semantics = &semBogus;
+ rhs.semantics = &APFloatBase::semBogus;
return *this;
}
@@ -1247,7 +1219,7 @@ IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
assign(rhs);
}
-IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
+IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
*this = std::move(rhs);
}
@@ -2607,8 +2579,8 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
shift = toSemantics.precision - fromSemantics.precision;
bool X86SpecialNan = false;
- if (&fromSemantics == &semX87DoubleExtended &&
- &toSemantics != &semX87DoubleExtended && category == fcNaN &&
+ if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
+ &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
(!(*significandParts() & 0x8000000000000000ULL) ||
!(*significandParts() & 0x4000000000000000ULL))) {
// x86 has some unusual NaNs which cannot be represented in any other
@@ -2694,7 +2666,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// For x87 extended precision, we want to make a NaN, not a special NaN if
// the input wasn't special either.
- if (!X86SpecialNan && semantics == &semX87DoubleExtended)
+ if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significandParts(), semantics->precision - 1);
// Convert of sNaN creates qNaN and raises an exception (invalid op).
@@ -3530,7 +3502,8 @@ hash_code hash_value(const IEEEFloat &Arg) {
// the actual IEEE respresentations. We compensate for that here.
APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
uint64_t myexponent, mysignificand;
@@ -3560,7 +3533,8 @@ APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
}
APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
assert(partCount()==2);
uint64_t words[2];
@@ -3574,14 +3548,14 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
// Declare fltSemantics before APFloat that uses it (and
// saves pointer to it) to ensure correct destruction order.
fltSemantics extendedSemantics = *semantics;
- extendedSemantics.minExponent = semIEEEdouble.minExponent;
+ extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
IEEEFloat extended(*this);
fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
IEEEFloat u(extended);
- fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK || fs == opInexact);
(void)fs;
words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
@@ -3597,7 +3571,7 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
IEEEFloat v(extended);
v.subtract(u, rmNearestTiesToEven);
- fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
@@ -3611,8 +3585,9 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
template <const fltSemantics &S>
APInt IEEEFloat::convertIEEEFloatToAPInt() const {
assert(semantics == &S);
- const int bias =
- (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
+ const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
+ ? -S.minExponent
+ : -(S.minExponent - 1);
constexpr unsigned int trailing_significand_bits = S.precision - 1;
constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
constexpr integerPart integer_bit =
@@ -3677,87 +3652,87 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
assert(partCount() == 2);
- return convertIEEEFloatToAPInt<semIEEEquad>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
}
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEdouble>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
}
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEsingle>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
}
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semBFloat>();
+ return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
}
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEhalf>();
+ return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
}
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
}
APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
}
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
}
APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
}
APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E3M4>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
}
APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloatTF32>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
}
APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
}
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
}
APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
}
APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
}
// This function creates an APInt that is just a bit map of the floating
@@ -3765,74 +3740,77 @@ APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
// and treating the result as a normal integer is unlikely to be useful.
APInt IEEEFloat::bitcastToAPInt() const {
- if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
return convertHalfAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semBFloat)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
return convertBFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return convertFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return convertDoubleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return convertQuadrupleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
return convertFloat8E5M2FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
return convertFloat8E4M3APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
return convertFloat8E4M3FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
return convertFloat8E4M3B11FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
return convertFloat8E3M4APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
return convertFloatTF32APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
return convertFloat8E8M0FNUAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
return convertFloat6E3M2FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
return convertFloat6E2M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
return convertFloat4E2M1FNAPFloatToAPInt();
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
}
float IEEEFloat::convertToFloat() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
"Float semantics are not IEEEsingle");
APInt api = bitcastToAPInt();
return api.bitsToFloat();
}
double IEEEFloat::convertToDouble() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
"Float semantics are not IEEEdouble");
APInt api = bitcastToAPInt();
return api.bitsToDouble();
@@ -3840,7 +3818,7 @@ double IEEEFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 IEEEFloat::convertToQuad() const {
- assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
"Float semantics are not IEEEquads");
APInt api = bitcastToAPInt();
return api.bitsToQuad();
@@ -3861,7 +3839,7 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
uint64_t mysignificand = i1;
uint8_t myintegerbit = mysignificand >> 63;
- initialize(&semX87DoubleExtended);
+ initialize(&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
sign = static_cast<unsigned int>(i2>>15);
@@ -3893,14 +3871,16 @@ void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
// Get the first double and convert to our format.
initFromDoubleAPInt(APInt(64, i1));
- fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
// Unless we have a special case, add in second double.
if (isFiniteNonZero()) {
- IEEEFloat v(semIEEEdouble, APInt(64, i2));
- fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
+ fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
@@ -3918,7 +3898,7 @@ void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
uint64_t val = api.getRawData()[0];
uint64_t myexponent = (val & exponent_mask);
- initialize(&semFloat8E8M0FNU);
+ initialize(&APFloatBase::semFloat8E8M0FNU);
assert(partCount() == 1);
// This format has unsigned representation only
@@ -4025,109 +4005,109 @@ void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
}
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEquad>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
}
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEdouble>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
}
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEsingle>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
}
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semBFloat>(api);
+ initFromIEEEAPInt<APFloatBase::semBFloat>(api);
}
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEhalf>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
}
void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
}
void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
}
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
}
void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
}
void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E3M4>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
}
void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
- initFromIEEEAPInt<semFloatTF32>(api);
+ initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
}
void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E3M2FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
}
void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E2M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
}
void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat4E2M1FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
}
/// Treat api as containing the bits of a floating point number.
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
assert(api.getBitWidth() == Sem->sizeInBits);
- if (Sem == &semIEEEhalf)
+ if (Sem == &APFloatBase::semIEEEhalf)
return initFromHalfAPInt(api);
- if (Sem == &semBFloat)
+ if (Sem == &APFloatBase::semBFloat)
return initFromBFloatAPInt(api);
- if (Sem == &semIEEEsingle)
+ if (Sem == &APFloatBase::semIEEEsingle)
return initFromFloatAPInt(api);
- if (Sem == &semIEEEdouble)
+ if (Sem == &APFloatBase::semIEEEdouble)
return initFromDoubleAPInt(api);
- if (Sem == &semX87DoubleExtended)
+ if (Sem == &APFloatBase::semX87DoubleExtended)
return initFromF80LongDoubleAPInt(api);
- if (Sem == &semIEEEquad)
+ if (Sem == &APFloatBase::semIEEEquad)
return initFromQuadrupleAPInt(api);
- if (Sem == &semPPCDoubleDoubleLegacy)
+ if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
return initFromPPCDoubleDoubleLegacyAPInt(api);
- if (Sem == &semFloat8E5M2)
+ if (Sem == &APFloatBase::semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
- if (Sem == &semFloat8E5M2FNUZ)
+ if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
return initFromFloat8E5M2FNUZAPInt(api);
- if (Sem == &semFloat8E4M3)
+ if (Sem == &APFloatBase::semFloat8E4M3)
return initFromFloat8E4M3APInt(api);
- if (Sem == &semFloat8E4M3FN)
+ if (Sem == &APFloatBase::semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
- if (Sem == &semFloat8E4M3FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
return initFromFloat8E4M3FNUZAPInt(api);
- if (Sem == &semFloat8E4M3B11FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
return initFromFloat8E4M3B11FNUZAPInt(api);
- if (Sem == &semFloat8E3M4)
+ if (Sem == &APFloatBase::semFloat8E3M4)
return initFromFloat8E3M4APInt(api);
- if (Sem == &semFloatTF32)
+ if (Sem == &APFloatBase::semFloatTF32)
return initFromFloatTF32APInt(api);
- if (Sem == &semFloat8E8M0FNU)
+ if (Sem == &APFloatBase::semFloat8E8M0FNU)
return initFromFloat8E8M0FNUAPInt(api);
- if (Sem == &semFloat6E3M2FN)
+ if (Sem == &APFloatBase::semFloat6E3M2FN)
return initFromFloat6E3M2FNAPInt(api);
- if (Sem == &semFloat6E2M3FN)
+ if (Sem == &APFloatBase::semFloat6E2M3FN)
return initFromFloat6E2M3FNAPInt(api);
- if (Sem == &semFloat4E2M1FN)
+ if (Sem == &APFloatBase::semFloat4E2M1FN)
return initFromFloat4E2M1FNAPInt(api);
llvm_unreachable("unsupported semantics");
@@ -4202,11 +4182,11 @@ IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
}
IEEEFloat::IEEEFloat(float f) {
- initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
+ initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
}
IEEEFloat::IEEEFloat(double d) {
- initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
+ initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
}
namespace {
@@ -4815,38 +4795,40 @@ IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
: Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
- : Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
- APFloat(semIEEEdouble, uninitialized)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S), Floats(new APFloat[2]{
+ APFloat(APFloatBase::semIEEEdouble, uninitialized),
+ APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
- : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
- APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S),
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
: Semantics(&S),
Floats(new APFloat[2]{
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
- assert(Semantics == &semPPCDoubleDouble);
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
APFloat &&Second)
: Semantics(&S),
Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
- assert(Semantics == &semPPCDoubleDouble);
- assert(&Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Floats[1].getSemantics() == &semIEEEdouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
+ assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
}
DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
@@ -4854,14 +4836,14 @@ DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
APFloat(RHS.Floats[1])}
: nullptr) {
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
: Semantics(RHS.Semantics), Floats(RHS.Floats) {
- RHS.Semantics = &semBogus;
+ RHS.Semantics = &APFloatBase::semBogus;
RHS.Floats = nullptr;
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
@@ -5009,12 +4991,12 @@ APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
CC(RHS.Floats[1]);
- assert(&A.getSemantics() == &semIEEEdouble);
- assert(&AA.getSemantics() == &semIEEEdouble);
- assert(&C.getSemantics() == &semIEEEdouble);
- assert(&CC.getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
+ assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
return Out.addImpl(A, AA, C, CC, RM);
}
@@ -5119,28 +5101,32 @@ APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.divide(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.remainder(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.mod(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5148,17 +5134,21 @@ APFloat::opStatus
DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
const DoubleAPFloat &Addend,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
auto Ret = Tmp.fusedMultiplyAdd(
- APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
- APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
+ Multiplicand.bitcastToAPInt()),
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
+ RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
const APFloat &Hi = getFirst();
const APFloat &Lo = getSecond();
@@ -5309,22 +5299,28 @@ void DoubleAPFloat::makeZero(bool Neg) {
}
void DoubleAPFloat::makeLargest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
- Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
+ Floats[1] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
if (Neg)
changeSign();
}
void DoubleAPFloat::makeSmallest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
Floats[0].makeSmallest(Neg);
Floats[1].makeZero(/* Neg = */ false);
}
void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
if (Neg)
Floats[0].changeSign();
Floats[1].makeZero(/* Neg = */ false);
@@ -5355,7 +5351,8 @@ hash_code hash_value(const DoubleAPFloat &Arg) {
}
APInt DoubleAPFloat::bitcastToAPInt() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
uint64_t Data[] = {
Floats[0].bitcastToAPInt().getRawData()[0],
Floats[1].bitcastToAPInt().getRawData()[0],
@@ -5365,10 +5362,11 @@ APInt DoubleAPFloat::bitcastToAPInt() const {
Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
auto Ret = Tmp.convertFromString(S, RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5379,7 +5377,8 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
// nextUp must choose the smallest output > input that follows these rules.
// nexDown must choose the largest output < input that follows these rules.
APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// nextDown(x) = -nextUp(-x)
if (nextDown) {
changeSign();
@@ -5481,7 +5480,8 @@ APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
roundingMode RM, bool *IsExact) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// If Hi is not finite, or Lo is zero, the value is entirely represented
// by Hi. Delegate to the simpler single-APFloat conversion.
@@ -5761,8 +5761,9 @@ unsigned int DoubleAPFloat::convertToHexString(char *DST,
unsigned int HexDigits,
bool UpperCase,
roundingMode RM) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.convertToHexString(DST, HexDigits, UpperCase, RM);
}
@@ -5799,7 +5800,8 @@ bool DoubleAPFloat::isLargest() const {
}
bool DoubleAPFloat::isInteger() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
return Floats[0].isInteger() && Floats[1].isInteger();
}
@@ -5807,8 +5809,9 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
unsigned FormatPrecision,
unsigned FormatMaxPadding,
bool TruncateZero) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
}
@@ -5840,14 +5843,17 @@ int ilogb(const DoubleAPFloat &Arg) {
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(),
+ scalbn(Arg.Floats[0], Exp, RM),
scalbn(Arg.Floats[1], Exp, RM));
}
DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
// Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
// [1.0, 2.0).
@@ -5943,7 +5949,8 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
}
APFloat First = scalbn(Hi, -Exp, RM);
- return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), std::move(First),
+ std::move(Second));
}
} // namespace detail
@@ -5955,9 +5962,8 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
const fltSemantics& S = F.getSemantics();
- new (&Double)
- DoubleAPFloat(Semantics, APFloat(std::move(F), S),
- APFloat(semIEEEdouble));
+ new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
+ APFloat(APFloatBase::IEEEdouble()));
return;
}
llvm_unreachable("Unexpected semantics");
@@ -6065,8 +6071,9 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
return U.IEEE.convert(ToSemantics, RM, losesInfo);
if (usesLayout<IEEEFloat>(getSemantics()) &&
usesLayout<DoubleAPFloat>(ToSemantics)) {
- assert(&ToSemantics == &semPPCDoubleDouble);
- auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
+ assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
+ auto Ret =
+ U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
*this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
return Ret;
}
@@ -6113,13 +6120,15 @@ APFloat::opStatus APFloat::convertToInteger(APSInt &result,
}
double APFloat::convertToDouble() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return getIEEE().convertToDouble();
assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
"Float semantics is not representable by IEEEdouble");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToDouble();
@@ -6127,13 +6136,14 @@ double APFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 APFloat::convertToQuad() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
+ if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return getIEEE().convertToQuad();
assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
"Float semantics is not representable by IEEEquad");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToQuad();
@@ -6141,18 +6151,84 @@ float128 APFloat::convertToQuad() const {
#endif
float APFloat::convertToFloat() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return getIEEE().convertToFloat();
assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
"Float semantics is not representable by IEEEsingle");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToFloat();
}
+APFloat::Storage::~Storage() {
+ if (usesLayout<IEEEFloat>(*semantics)) {
+ IEEE.~IEEEFloat();
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*semantics)) {
+ Double.~DoubleAPFloat();
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(RHS.IEEE);
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(RHS.Double);
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(std::move(RHS.IEEE));
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(std::move(RHS.Double));
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = RHS.IEEE;
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = RHS.Double;
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(RHS);
+ }
+ return *this;
+}
+
+APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = std::move(RHS.IEEE);
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = std::move(RHS.Double);
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(std::move(RHS));
+ }
+ return *this;
+}
+
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 979a8b0..4b22c68 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
+#include <array>
namespace llvm {
@@ -45,7 +46,7 @@ struct GCNRegPressure {
return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
}
- void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
+ void clear() { Value.fill(0); }
unsigned getNumRegs(RegKind Kind) const {
assert(Kind < TOTAL_KINDS);
@@ -127,9 +128,7 @@ struct GCNRegPressure {
bool less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
- bool operator==(const GCNRegPressure &O) const {
- return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
- }
+ bool operator==(const GCNRegPressure &O) const { return Value == O.Value; }
bool operator!=(const GCNRegPressure &O) const {
return !(*this == O);
@@ -160,7 +159,7 @@ private:
/// Pressure for all register kinds (first all regular registers kinds, then
/// all tuple register kinds).
- unsigned Value[ValueArraySize];
+ std::array<unsigned, ValueArraySize> Value;
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..09ef6ac 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
+ // 32-bit ABS is legal for AMDGPU except for R600
+ setOperationAction(ISD::ABS, MVT::i32, Expand);
+
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 1b7cb9b..636e31c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -699,7 +699,8 @@ public:
"Can't encode VTYPE for uninitialized or unknown");
if (TWiden != 0)
return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic,
+ AltFmt);
}
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ddb53a2..12f776b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3775,11 +3775,13 @@ std::string RISCVInstrInfo::createMIROperandComment(
#define CASE_VFMA_OPCODE_VV(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
#define CASE_VFMA_SPLATS(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
// clang-format on
@@ -4003,11 +4005,13 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
// clang-format on
@@ -4469,6 +4473,20 @@ bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
// clang-format on
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
@@ -4478,6 +4496,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return nullptr;
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
@@ -4494,6 +4514,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
llvm_unreachable("Unexpected opcode");
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
}
// clang-format on
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index c9c1246..9358486 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -44,6 +44,336 @@ let Predicates = [HasStdExtZvfbfmin] in {
let mayRaiseFPException = true, Predicates = [HasStdExtZvfbfwma] in
defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
+defset list<VTypeInfoToWide> AllWidenableIntToBF16Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+multiclass VPseudoVALU_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVALU_VF_RM_BF16 {
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_WV_WF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_WV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFMUL_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWMUL_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVMAC_VV_VF_AAXA_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, 16/*sew*/>,
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, f.SEW>,
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVWMAC_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=16>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVRCP_V_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVRCP_V_RM_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVMAX_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVSGNJ_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWCVTF_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 8/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVWCVTD_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=16,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_RM_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+let Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT in {
+let mayRaiseFPException = true in {
+defm PseudoVFADD_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFSUB_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFRSUB_ALT : VPseudoVALU_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWADD_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWADD_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFMUL_ALT : VPseudoVFMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFWMUL_ALT : VPseudoVWMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFRSQRT7_ALT : VPseudoVRCP_V_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFREC7_ALT : VPseudoVRCP_V_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMIN_ALT : VPseudoVMAX_VV_VF_BF16;
+defm PseudoVFMAX_ALT : VPseudoVMAX_VV_VF_BF16;
+}
+
+defm PseudoVFSGNJ_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJN_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJX_ALT : VPseudoVSGNJ_VV_VF_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVMFEQ_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFNE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLT_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFGT_ALT : VPseudoVCMPM_VF;
+defm PseudoVMFGE_ALT : VPseudoVCMPM_VF;
+}
+
+defm PseudoVFCLASS_ALT : VPseudoVCLS_V;
+
+defm PseudoVFMERGE_ALT : VPseudoVMRG_FM;
+
+defm PseudoVFMV_V_ALT : VPseudoVMV_F;
+
+let mayRaiseFPException = true in {
+defm PseudoVFWCVT_F_XU_ALT : VPseudoVWCVTF_V_BF16;
+defm PseudoVFWCVT_F_X_ALT : VPseudoVWCVTF_V_BF16;
+
+defm PseudoVFWCVT_F_F_ALT : VPseudoVWCVTD_V_BF16;
+} // mayRaiseFPException = true
+
+let mayRaiseFPException = true in {
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_XU_F_ALT : VPseudoVNCVTI_W_RM;
+defm PseudoVFNCVT_X_F_ALT : VPseudoVNCVTI_W_RM;
+}
+
+defm PseudoVFNCVT_RTZ_XU_F_ALT : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_X_F_ALT : VPseudoVNCVTI_W;
+
+defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_W_RM_BF16;
+
+defm PseudoVFNCVT_ROD_F_F_ALT : VPseudoVNCVTD_W_BF16;
+} // mayRaiseFPException = true
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ defvar f = SCALAR_F16;
+ let HasSEWOp = 1, BaseInstr = VFMV_F_S in
+ def "PseudoVFMV_" # f.FX # "_S_ALT" :
+ RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>,
+ Sched<[WriteVMovFS, ReadVMovFS]>;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
+ Constraints = "$rd = $passthru" in
+ def "PseudoVFMV_S_" # f.FX # "_ALT" :
+ RISCVVPseudo<(outs VR:$rd),
+ (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>,
+ Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
+}
+
+defm PseudoVFSLIDE1UP_ALT : VPseudoVSLD1_VF<"@earlyclobber $rd">;
+defm PseudoVFSLIDE1DOWN_ALT : VPseudoVSLD1_VF;
+} // Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@@ -108,6 +438,130 @@ let Predicates = [HasStdExtZvfbfmin] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+
+ defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
+ defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllBF16Vectors, uimm5>;
+ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllBF16Vectors>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
+
+ foreach fvti = AllBF16Vectors in {
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
+ fvti.Vector,
+ fvti.Vector, fvti.Vector, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ "V"#fvti.ScalarSuffix#"M",
+ fvti.Vector,
+ fvti.Vector, fvti.Scalar, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.ScalarRegClass>;
+ defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
+ (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask VMV0:$vm), VLOpFrag)),
+ (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
+ (ivti.Vector fvti.RegClass:$rs1),
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
+ uimm5:$imm,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
let Predicates = [HasStdExtZvfbfwma] in {
@@ -118,3 +572,224 @@ let Predicates = [HasStdExtZvfbfwma] in {
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16",
AllWidenableBF16ToFloatVectors>;
}
+
+multiclass VPatConversionVI_VF_BF16<string intrinsic, string instruction> {
+ foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VI_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypeMinimalPredicates<fvti>.Predicates,
+ GetVTypeMinimalPredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionVI_WF_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVI_WF_RM_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfbfa] in {
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfgt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatConversionVI_VF_BF16<"int_riscv_vfclass", "PseudoVFCLASS_ALT">;
+foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE_ALT",
+ "V"#vti.ScalarSuffix#"M",
+ vti.Vector,
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF_BF16<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F_ALT">;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F_ALT">;
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>;
+
+foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
+ // 13.16. Vector Floating-Point Move Instruction
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
+ $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = NoGroupBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT")
+ vti.RegClass:$passthru,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ }
+
+ defvar vfmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
+ vti.ScalarSuffix,
+ "_S_ALT"));
+ // Only pattern-match extract-element operations where the index is 0. Any
+ // other index will have been custom-lowered to slide the vector correctly
+ // into place.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
+ (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
+}
+} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6acf799..334db4b 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -288,9 +288,12 @@ public:
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- bool hasVInstructionsBF16Minimal() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsBF16Minimal() const {
+ return HasStdExtZvfbfmin || HasStdExtZvfbfa;
+ }
bool hasVInstructionsF32() const { return HasStdExtZve32f; }
bool hasVInstructionsF64() const { return HasStdExtZve64d; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfa; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feee05..b05d7c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44813,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
case X86ISD::PCMPGT:
// icmp sgt(0, R) == ashr(R, BitWidth-1).
- // iff we only need the sign bit then we can use R directly.
- if (OriginalDemandedBits.isSignMask() &&
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) {
+ // iff we only need the signbit then we can use R directly.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // otherwise we just need R's signbit for the comparison.
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ }
break;
case X86ISD::MOVMSK: {
SDValue Src = Op.getOperand(0);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 09cb225..a8eb9b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3757,6 +3757,10 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
// (x < y) ? -1 : zext(x > y)
// (x > y) ? 1 : sext(x != y)
// (x > y) ? 1 : sext(x < y)
+// (x == y) ? 0 : (x > y ? 1 : -1)
+// (x == y) ? 0 : (x < y ? -1 : 1)
+// Special case: x == C ? 0 : (x > C - 1 ? 1 : -1)
+// Special case: x == C ? 0 : (x < C + 1 ? -1 : 1)
// Into ucmp/scmp(x, y), where signedness is determined by the signedness
// of the comparison in the original sequence.
Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
@@ -3849,6 +3853,44 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
}
}
+ // Special cases with constants: x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (Pred == ICmpInst::ICMP_EQ && match(TV, m_Zero())) {
+ const APInt *C;
+ if (match(RHS, m_APInt(C))) {
+ CmpPredicate InnerPred;
+ Value *InnerRHS;
+ const APInt *InnerTV, *InnerFV;
+ if (match(FV,
+ m_Select(m_ICmp(InnerPred, m_Specific(LHS), m_Value(InnerRHS)),
+ m_APInt(InnerTV), m_APInt(InnerFV)))) {
+
+ // x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (ICmpInst::isGT(InnerPred) && InnerTV->isOne() &&
+ InnerFV->isAllOnes()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanSubOne = IsSigned ? !C->isMinSignedValue() : !C->isMinValue();
+ if (CanSubOne) {
+ APInt Cminus1 = *C - 1;
+ if (match(InnerRHS, m_SpecificInt(Cminus1)))
+ Replace = true;
+ }
+ }
+
+ // x == C ? 0 : (x < C+1 ? -1 : 1)
+ if (ICmpInst::isLT(InnerPred) && InnerTV->isAllOnes() &&
+ InnerFV->isOne()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanAddOne = IsSigned ? !C->isMaxSignedValue() : !C->isMaxValue();
+ if (CanAddOne) {
+ APInt Cplus1 = *C + 1;
+ if (match(InnerRHS, m_SpecificInt(Cplus1)))
+ Replace = true;
+ }
+ }
+ }
+ }
+ }
+
Intrinsic::ID IID = IsSigned ? Intrinsic::scmp : Intrinsic::ucmp;
if (Replace)
return replaceInstUsesWith(
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 9693ae6..4947d03 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -634,18 +635,10 @@ private:
/// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
/// changes.
bool mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts = {
/*MayIncludeUndef=*/false, /*CheckWiden=*/false});
- bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
- ValueLatticeElement::MergeOptions Opts = {
- /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
- assert(!V->getType()->isStructTy() &&
- "non-structs should use markConstant");
- return mergeInValue(ValueState[V], V, MergeWithV, Opts);
- }
-
/// getValueState - Return the ValueLatticeElement object that corresponds to
/// the value. This function handles the case when the value hasn't been seen
/// yet by properly seeding constants etc.
@@ -768,6 +761,7 @@ private:
void handleCallArguments(CallBase &CB);
void handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO, unsigned Idx);
+ bool isInstFullyOverDefined(Instruction &Inst);
private:
friend class InstVisitor<SCCPInstVisitor>;
@@ -987,7 +981,7 @@ public:
void trackValueOfArgument(Argument *A) {
if (A->getType()->isStructTy())
return (void)markOverdefined(A);
- mergeInValue(A, getArgAttributeVL(A));
+ mergeInValue(ValueState[A], A, getArgAttributeVL(A));
}
bool isStructLatticeConstant(Function *F, StructType *STy);
@@ -1128,8 +1122,7 @@ bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
assert(It != TrackedMultipleRetVals.end());
- ValueLatticeElement LV = It->second;
- if (!SCCPSolver::isConstant(LV))
+ if (!SCCPSolver::isConstant(It->second))
return false;
}
return true;
@@ -1160,7 +1153,7 @@ Constant *SCCPInstVisitor::getConstantOrNull(Value *V) const {
std::vector<Constant *> ConstVals;
auto *ST = cast<StructType>(V->getType());
for (unsigned I = 0, E = ST->getNumElements(); I != E; ++I) {
- ValueLatticeElement LV = LVs[I];
+ const ValueLatticeElement &LV = LVs[I];
ConstVals.push_back(SCCPSolver::isConstant(LV)
? getConstant(LV, ST->getElementType(I))
: UndefValue::get(ST->getElementType(I)));
@@ -1225,7 +1218,7 @@ void SCCPInstVisitor::visitInstruction(Instruction &I) {
}
bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts) {
if (IV.mergeIn(MergeWithV, Opts)) {
pushUsersToWorkList(V);
@@ -1264,7 +1257,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- ValueLatticeElement BCValue = getValueState(BI->getCondition());
+ const ValueLatticeElement &BCValue = getValueState(BI->getCondition());
ConstantInt *CI = getConstantInt(BCValue, BI->getCondition()->getType());
if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
@@ -1326,7 +1319,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
// the target as executable.
if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
// Casts are folded by visitCastInst.
- ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
+ const ValueLatticeElement &IBRValue = getValueState(IBR->getAddress());
BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(
getConstant(IBRValue, IBR->getAddress()->getType()));
if (!Addr) { // Overdefined or unknown condition?
@@ -1383,49 +1376,66 @@ bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// 7. If a conditional branch has a value that is overdefined, make all
// successors executable.
void SCCPInstVisitor::visitPHINode(PHINode &PN) {
- // If this PN returns a struct, just mark the result overdefined.
- // TODO: We could do a lot better than this if code actually uses this.
- if (PN.getType()->isStructTy())
- return (void)markOverdefined(&PN);
-
- if (getValueState(&PN).isOverdefined())
- return; // Quick exit
-
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
// and slow us down a lot. Just mark them overdefined.
if (PN.getNumIncomingValues() > 64)
return (void)markOverdefined(&PN);
- unsigned NumActiveIncoming = 0;
+ if (isInstFullyOverDefined(PN))
+ return;
+ SmallVector<unsigned> FeasibleIncomingIndices;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+ FeasibleIncomingIndices.push_back(i);
+ }
// Look at all of the executable operands of the PHI node. If any of them
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
// constant. If they are constant and don't agree, the PHI is a constant
// range. If there are no executable operands, the PHI remains unknown.
- ValueLatticeElement PhiState = getValueState(&PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
- continue;
-
- ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
- PhiState.mergeIn(IV);
- NumActiveIncoming++;
- if (PhiState.isOverdefined())
- break;
+ if (StructType *STy = dyn_cast<StructType>(PN.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement PhiState = getStructValueState(&PN, i);
+ if (PhiState.isOverdefined())
+ continue;
+ for (unsigned j : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV =
+ getStructValueState(PN.getIncomingValue(j), i);
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ ValueLatticeElement &PhiStateRef = getStructValueState(&PN, i);
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
+ }
+ } else {
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ ValueLatticeElement &PhiStateRef = ValueState[&PN];
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
}
-
- // We allow up to 1 range extension per active incoming value and one
- // additional extension. Note that we manually adjust the number of range
- // extensions to match the number of active incoming values. This helps to
- // limit multiple extensions caused by the same incoming value, if other
- // incoming values are equal.
- mergeInValue(&PN, PhiState,
- ValueLatticeElement::MergeOptions().setMaxWidenSteps(
- NumActiveIncoming + 1));
- ValueLatticeElement &PhiStateRef = getValueState(&PN);
- PhiStateRef.setNumRangeExtensions(
- std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
}
void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
@@ -1481,7 +1491,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
}
}
- ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ const ValueLatticeElement &OpSt = getValueState(I.getOperand(0));
if (OpSt.isUnknownOrUndef())
return;
@@ -1496,9 +1506,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (I.getDestTy()->isIntOrIntVectorTy() &&
I.getSrcTy()->isIntOrIntVectorTy() &&
I.getOpcode() != Instruction::BitCast) {
- auto &LV = getValueState(&I);
ConstantRange OpRange =
OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false);
+ auto &LV = getValueState(&I);
Type *DestTy = I.getDestTy();
ConstantRange Res = ConstantRange::getEmpty(DestTy->getScalarSizeInBits());
@@ -1516,19 +1526,24 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO,
unsigned Idx) {
Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
- ValueLatticeElement L = getValueState(LHS);
- ValueLatticeElement R = getValueState(RHS);
+ Type *Ty = LHS->getType();
+
addAdditionalUser(LHS, &EVI);
addAdditionalUser(RHS, &EVI);
- if (L.isUnknownOrUndef() || R.isUnknownOrUndef())
- return; // Wait to resolve.
- Type *Ty = LHS->getType();
+ const ValueLatticeElement &L = getValueState(LHS);
+ if (L.isUnknownOrUndef())
+ return; // Wait to resolve.
ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false);
+
+ const ValueLatticeElement &R = getValueState(RHS);
+ if (R.isUnknownOrUndef())
+ return; // Wait to resolve.
+
ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false);
if (Idx == 0) {
ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
- mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
+ mergeInValue(ValueState[&EVI], &EVI, ValueLatticeElement::getRange(Res));
} else {
assert(Idx == 1 && "Index can only be 0 or 1");
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
@@ -1560,7 +1575,7 @@ void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
if (auto *WO = dyn_cast<WithOverflowInst>(AggVal))
return handleExtractOfWithOverflow(EVI, WO, i);
ValueLatticeElement EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ mergeInValue(ValueState[&EVI], &EVI, EltVal);
} else {
// Otherwise, must be extracting from an array.
return (void)markOverdefined(&EVI);
@@ -1616,14 +1631,18 @@ void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement CondValue = getValueState(I.getCondition());
+ const ValueLatticeElement &CondValue = getValueState(I.getCondition());
if (CondValue.isUnknownOrUndef())
return;
if (ConstantInt *CondCB =
getConstantInt(CondValue, I.getCondition()->getType())) {
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
- mergeInValue(&I, getValueState(OpVal));
+ const ValueLatticeElement &OpValState = getValueState(OpVal);
+ // Safety: ValueState[&I] doesn't invalidate OpValState since it is already
+ // in the map.
+ assert(ValueState.contains(&I) && "&I is not in ValueState map.");
+ mergeInValue(ValueState[&I], &I, OpValState);
return;
}
@@ -1721,7 +1740,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
// being a special floating value.
ValueLatticeElement NewV;
NewV.markConstant(C, /*MayIncludeUndef=*/true);
- return (void)mergeInValue(&I, NewV);
+ return (void)mergeInValue(ValueState[&I], &I, NewV);
}
}
@@ -1741,7 +1760,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind());
else
R = A.binaryOp(BO->getOpcode(), B);
- mergeInValue(&I, ValueLatticeElement::getRange(R));
+ mergeInValue(ValueState[&I], &I, ValueLatticeElement::getRange(R));
// TODO: Currently we do not exploit special values that produce something
// better than overdefined with an overdefined operand for vector or floating
@@ -1767,7 +1786,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
if (C) {
ValueLatticeElement CV;
CV.markConstant(C);
- mergeInValue(&I, CV);
+ mergeInValue(ValueState[&I], &I, CV);
return;
}
@@ -1802,7 +1821,7 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
Operands.reserve(I.getNumOperands());
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- ValueLatticeElement State = getValueState(I.getOperand(i));
+ const ValueLatticeElement &State = getValueState(I.getOperand(i));
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1881,14 +1900,13 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
+ const ValueLatticeElement &PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUnknownOrUndef())
return; // The pointer is not resolved yet!
- ValueLatticeElement &IV = ValueState[&I];
-
if (SCCPSolver::isConstant(PtrVal)) {
Constant *Ptr = getConstant(PtrVal, I.getOperand(0)->getType());
+ ValueLatticeElement &IV = ValueState[&I];
// load null is undefined.
if (isa<ConstantPointerNull>(Ptr)) {
@@ -1916,7 +1934,7 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
}
// Fall back to metadata.
- mergeInValue(&I, getValueFromMetadata(&I));
+ mergeInValue(ValueState[&I], &I, getValueFromMetadata(&I));
}
void SCCPInstVisitor::visitCallBase(CallBase &CB) {
@@ -1944,7 +1962,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
return markOverdefined(&CB); // Can't handle struct args.
if (A.get()->getType()->isMetadataTy())
continue; // Carried in CB, not allowed in Operands.
- ValueLatticeElement State = getValueState(A);
+ const ValueLatticeElement &State = getValueState(A);
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1964,7 +1982,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
}
// Fall back to metadata.
- mergeInValue(&CB, getValueFromMetadata(&CB));
+ mergeInValue(ValueState[&CB], &CB, getValueFromMetadata(&CB));
}
void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
@@ -1992,10 +2010,11 @@ void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
getMaxWidenStepsOpts());
}
- } else
- mergeInValue(&*AI,
- getValueState(*CAI).intersect(getArgAttributeVL(&*AI)),
- getMaxWidenStepsOpts());
+ } else {
+ ValueLatticeElement CallArg =
+ getValueState(*CAI).intersect(getArgAttributeVL(&*AI));
+ mergeInValue(ValueState[&*AI], &*AI, CallArg, getMaxWidenStepsOpts());
+ }
}
}
}
@@ -2076,7 +2095,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
if (II->getIntrinsicID() == Intrinsic::vscale) {
unsigned BitWidth = CB.getType()->getScalarSizeInBits();
const ConstantRange Result = getVScaleRange(II->getFunction(), BitWidth);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
@@ -2094,7 +2114,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
ConstantRange Result =
ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
}
@@ -2121,10 +2142,25 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return handleCallOverdefined(CB); // Not tracking this callee.
// If so, propagate the return value of the callee into this call result.
- mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
+ mergeInValue(ValueState[&CB], &CB, TFRVI->second, getMaxWidenStepsOpts());
}
}
+bool SCCPInstVisitor::isInstFullyOverDefined(Instruction &Inst) {
+ // For structure Type, we handle each member separately.
+ // A structure object won't be considered as overdefined when
+ // there is at least one member that is not overdefined.
+ if (StructType *STy = dyn_cast<StructType>(Inst.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) {
+ if (!getStructValueState(&Inst, i).isOverdefined())
+ return false;
+ }
+ return true;
+ }
+
+ return getValueState(&Inst).isOverdefined();
+}
+
void SCCPInstVisitor::solve() {
// Process the work lists until they are empty!
while (!BBWorkList.empty() || !InstWorkList.empty()) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b62c8f1..9cd52da 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2242,8 +2242,49 @@ public:
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const;
+
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with constant stride).
+ ///
+ /// TODO:
+ /// It is possible that the load gets "widened". Suppose that originally each
+ /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is
+ /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2
+ /// ...
+ /// %b + 0 * %s + (w - 1)
+ ///
+ /// %b + 1 * %s + 0
+ /// %b + 1 * %s + 1
+ /// %b + 1 * %s + 2
+ /// ...
+ /// %b + 1 * %s + (w - 1)
+ /// ...
+ ///
+ /// %b + (n - 1) * %s + 0
+ /// %b + (n - 1) * %s + 1
+ /// %b + (n - 1) * %s + 2
+ /// ...
+ /// %b + (n - 1) * %s + (w - 1)
+ ///
+ /// In this case we will generate a strided load of type `<n x (k * w)>`.
+ ///
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ElemTy original scalar type of loads.
+ /// \param Alignment alignment of the first load.
+ /// \param SortedIndices is the order of PointerOps as returned by
+ /// `sortPtrAccesses`
+ /// \param Diff Pointer difference between the lowest and the highes pointer
+ /// in `PointerOps` as returned by `getPointersDiff`.
+ /// \param Ptr0 first pointer in `PointersOps`.
+ /// \param PtrN last pointer in `PointersOps`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ElemTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Return true if an array of scalar loads can be replaced with a strided
/// load (with run-time stride).
@@ -6849,9 +6890,8 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = PointerOps.size();
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const {
if (Diff % (Sz - 1) != 0)
return false;
@@ -6875,27 +6915,40 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
+ return true;
+ }
+ return false;
+}
- // Iterate through all pointers and check if all distances are
- // unique multiple of Dist.
- SmallSet<int64_t, 4> Dists;
- for (Value *Ptr : PointerOps) {
- int64_t Dist = 0;
- if (Ptr == PtrN)
- Dist = Diff;
- else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
- // If the strides are not the same or repeated, we can't
- // vectorize.
- if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
- break;
- }
- if (Dists.size() == Sz) {
- Type *StrideTy = DL->getIndexType(Ptr0->getType());
- SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
- SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
- return true;
- }
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ScalarTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
+ if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz))
+ return false;
+
+ int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
+
+ // Iterate through all pointers and check if all distances are
+ // unique multiple of Dist.
+ SmallSet<int64_t, 4> Dists;
+ for (Value *Ptr : PointerOps) {
+ int64_t Dist = 0;
+ if (Ptr == PtrN)
+ Dist = Diff;
+ else if (Ptr != Ptr0)
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
+ // If the strides are not the same or repeated, we can't
+ // vectorize.
+ if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+ break;
+ }
+ if (Dists.size() == Sz) {
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
+ SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
+ SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
+ return true;
}
return false;
}
@@ -6995,8 +7048,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
- if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
- SPtrInfo))
+ if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, Alignment, Order,
+ *Diff, Ptr0, PtrN, SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0e0b042..84d2ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -407,6 +407,10 @@ public:
VPBasicBlock *getParent() { return Parent; }
const VPBasicBlock *getParent() const { return Parent; }
+ /// \return the VPRegionBlock which the recipe belongs to.
+ VPRegionBlock *getRegion();
+ const VPRegionBlock *getRegion() const;
+
/// The method which generates the output IR instructions that correspond to
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(VPTransformState &State) = 0;
@@ -4075,6 +4079,14 @@ public:
}
};
+inline VPRegionBlock *VPRecipeBase::getRegion() {
+ return getParent()->getParent();
+}
+
+inline const VPRegionBlock *VPRecipeBase::getRegion() const {
+ return getParent()->getParent();
+}
+
/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index f413c63..7e074c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -377,7 +377,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
#ifndef NDEBUG
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
- auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
+ VPRegionBlock *Region = R->getRegion();
if (Region && Region->isReplicator()) {
assert(Region->getNumSuccessors() == 1 &&
Region->getNumPredecessors() == 1 && "Expected SESE region!");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7a98c75..d1e67e6b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2352,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = getParent()->getParent()->getCanonicalIV();
+ auto *CanIV = getRegion()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
getScalarType() == CanIV->getScalarType();
}
@@ -3076,7 +3076,7 @@ static void scalarizeInstruction(const Instruction *Instr,
State.AC->registerAssumption(II);
assert(
- (RepRecipe->getParent()->getParent() ||
+ (RepRecipe->getRegion() ||
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
all_of(RepRecipe->operands(),
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
@@ -3268,7 +3268,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
to_vector(operands()), VF);
// If the recipe is not predicated (i.e. not in a replicate region), return
// the scalar cost. Otherwise handle predicated cost.
- if (!getParent()->getParent()->isReplicator())
+ if (!getRegion()->isReplicator())
return ScalarCost;
// Account for the phi nodes that we will create.
@@ -3284,7 +3284,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::Store: {
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
- const VPRegionBlock *ParentRegion = getParent()->getParent();
+ const VPRegionBlock *ParentRegion = getRegion();
if (ParentRegion && ParentRegion->isReplicator())
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cae9aee8..f5f616f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1858,8 +1858,8 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
VPRegionBlock *EnclosingLoopRegion =
HoistCandidate->getParent()->getEnclosingLoopRegion();
- assert((!HoistCandidate->getParent()->getParent() ||
- HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) &&
+ assert((!HoistCandidate->getRegion() ||
+ HoistCandidate->getRegion() == EnclosingLoopRegion) &&
"CFG in VPlan should still be flat, without replicate regions");
// Hoist candidate was already visited, no need to hoist.
if (!Visited.insert(HoistCandidate).second)
@@ -2898,7 +2898,7 @@ void VPlanTransforms::replaceSymbolicStrides(
// evolution.
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = cast<VPRecipeBase>(&U);
- return R->getParent()->getParent() ||
+ return R->getRegion() ||
R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
};
ValueToSCEVMapTy RewriteMap;
@@ -3803,8 +3803,7 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
- VPRegionBlock *ParentRegion =
- cast<VPRecipeBase>(U)->getParent()->getParent();
+ VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
if ((isa<VPReplicateRecipe>(DefR) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index cf95ac0..9a2497e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -64,7 +64,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
return true;
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
- const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
+ const VPRegionBlock *RegionOfR = Rep->getRegion();
// Don't consider recipes in replicate regions as uniform yet; their first
// lane cannot be accessed when executing the replicate region for other
// lanes.
diff --git a/llvm/test/CodeGen/AMDGPU/abs_i32.ll b/llvm/test/CodeGen/AMDGPU/abs_i32.ll
new file mode 100644
index 0000000..b53047f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/abs_i32.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600 %s
+
+define amdgpu_kernel void @abs_v1(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v1:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: SUB_INT * T1.W, 0.0, PV.W,
+; R600-NEXT: MAX_INT T0.X, T0.W, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @abs_v2(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v2:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
+; R600-NEXT: MAX_INT T0.X, KC0[2].Z, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %neg = sub i32 0, %arg
+ %cond = icmp sgt i32 %arg, %neg
+ %res = select i1 %cond, i32 %arg, i32 %neg
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @abs_v3(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v3:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
+; R600-NEXT: MAX_INT T0.X, PV.W, KC0[2].Z,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %neg = sub i32 0, %arg
+ %cond = icmp sgt i32 %neg, %arg
+ %res = select i1 %cond, i32 %neg, i32 %arg
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll
new file mode 100644
index 0000000..489323b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ iXLen, iXLen);
+
+declare <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ iXLen);
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @test_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_half_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vse16.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ call void @llvm.riscv.vse(<vscale x 1 x half> %a, ptr %ptr, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_i32_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i32> %3, <vscale x 1 x i32> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_i32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v10, v10, v11
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32(
+ <vscale x 1 x i32> poison,
+ <vscale x 1 x i32> %3,
+ <vscale x 1 x i32> %4,
+ iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ call void @llvm.riscv.vse(<vscale x 1 x i32> %a, ptr %ptr, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_half_bf16_half(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_half_bf16_half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v11
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %c = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %a,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ store <vscale x 1 x half> %c, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_bf16_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_bf16_half_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vse16.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %c = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %a,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ store <vscale x 1 x half> %b, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %c
+}
+
+define <vscale x 1 x bfloat> @test_bf16_i16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i16> %3, <vscale x 1 x i16> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_bf16_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vadd.vv v9, v10, v11
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x i16> @llvm.riscv.vadd.nxv1i16.nxv1i16(
+ <vscale x 1 x i16> poison,
+ <vscale x 1 x i16> %3,
+ <vscale x 1 x i16> %4,
+ iXLen %2)
+
+ store <vscale x 1 x i16> %b, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll
new file mode 100644
index 0000000..db1b081
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll
@@ -0,0 +1,607 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll
new file mode 100644
index 0000000..d7d49b3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 1 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 2 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 4 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 8 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfclass.v v8, v10, v0.t
+; CHECK-NEXT: ret
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 16 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfclass.v v8, v12, v0.t
+; CHECK-NEXT: ret
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 32 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, tu, mu
+; CHECK-NEXT: vfclass.v v8, v16, v0.t
+; CHECK-NEXT: ret
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 32 x i16> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll
new file mode 100644
index 0000000..13821d7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll
new file mode 100644
index 0000000..09fc199
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmacc.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll
new file mode 100644
index 0000000..a337d30
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll
new file mode 100644
index 0000000..86ba7c7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 1 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 2 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 4 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 8 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 16 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 32 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll
new file mode 100644
index 0000000..37c0cf5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll
new file mode 100644
index 0000000..948d219
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll
new file mode 100644
index 0000000..6838f37
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsac.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll
new file mode 100644
index 0000000..44bce72
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll
@@ -0,0 +1,607 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll
new file mode 100644
index 0000000..fbc73119
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+experimental-zvfbfa -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+experimental-zvfbfa -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16(<vscale x 1 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16(<vscale x 2 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16(<vscale x 4 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16(<vscale x 8 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16(<vscale x 16 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16(<vscale x 32 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat> %0)
+ ret bfloat %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll
new file mode 100644
index 0000000..a810809
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat>, bfloat, iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat>, bfloat, iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat>, bfloat, iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat>, bfloat, iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat>, bfloat, iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat>, bfloat, iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16_negzero(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16_negzero:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat -0.0, iXLen %1)
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll
new file mode 100644
index 0000000..f3293dd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_zero_nxv1bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vmv.v.i_zero_nxv2bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vmv.v.i_zero_nxv4bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vmv.v.i_zero_nxv8bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vmv.v.i_zero_nxv16bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vmv.v.i_zero_nxv32bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll
new file mode 100644
index 0000000..7d587fd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll
new file mode 100644
index 0000000..ee9e3d1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll
new file mode 100644
index 0000000..521f727
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll
new file mode 100644
index 0000000..ab9ebad
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v10, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v12, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v16, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll
new file mode 100644
index 0000000..61c6803
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v10, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v12, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v16, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll
new file mode 100644
index 0000000..4b4091b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll
new file mode 100644
index 0000000..2bb6bf5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmacc.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll
new file mode 100644
index 0000000..cfbaafa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll
new file mode 100644
index 0000000..5ebbb90c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsac.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll
new file mode 100644
index 0000000..1211415
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrec7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrec7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrec7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrec7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrec7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrec7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll
new file mode 100644
index 0000000..4626b86
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll
new file mode 100644
index 0000000..54a6d48
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll
new file mode 100644
index 0000000..2cd698d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll
new file mode 100644
index 0000000..08340be
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll
new file mode 100644
index 0000000..e51a42e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll
new file mode 100644
index 0000000..c65719c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll
new file mode 100644
index 0000000..57a4898
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfslide1up.vf v10, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfslide1up.vf v12, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfslide1up.vf v16, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll
new file mode 100644
index 0000000..aea7521
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll
@@ -0,0 +1,559 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll
new file mode 100644
index 0000000..62feac8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll
new file mode 100644
index 0000000..c5417e8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll
@@ -0,0 +1,773 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl4re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll
new file mode 100644
index 0000000..b7df45b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll
new file mode 100644
index 0000000..c370261
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll
new file mode 100644
index 0000000..a3f6678
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll
new file mode 100644
index 0000000..577b93a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwmul.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwmul.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwmul.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwmul.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll
new file mode 100644
index 0000000..1e05e4c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll
new file mode 100644
index 0000000..223ad4f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll
new file mode 100644
index 0000000..d993e4e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll
new file mode 100644
index 0000000..b22899a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll
@@ -0,0 +1,773 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl4re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll
new file mode 100644
index 0000000..9bd859b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll
new file mode 100644
index 0000000..73946dc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll
new file mode 100644
index 0000000..fac324c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll
new file mode 100644
index 0000000..8356b7b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll
new file mode 100644
index 0000000..2e1bcc5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll
new file mode 100644
index 0000000..283ffc5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll
index 25f8ec8..482b4fc 100644
--- a/llvm/test/CodeGen/X86/combine-umax.ll
+++ b/llvm/test/CodeGen/X86/combine-umax.ll
@@ -60,7 +60,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_demandedbits:
; SSE2: # %bb.0:
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll
index 76dbcb5..e2757d0 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -77,7 +77,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_demandedbits:
; SSE2: # %bb.0:
-; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index ac932d5..1a63515 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -1090,7 +1090,6 @@ define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask, <16 x i8>
; AVX2-NEXT: pushq %r12
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
@@ -1335,7 +1334,6 @@ define <32 x i8> @test_compress_v32i8(<32 x i8> %vec, <32 x i1> %mask, <32 x i8>
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm3, %ymm3
; AVX2-NEXT: vmovaps %ymm2, (%rsp)
@@ -4733,7 +4731,6 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpextrb $0, %xmm0, -{{[0-9]+}}(%rsp)
@@ -4751,72 +4748,7 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: vpextrb $4, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: vpextrb $5, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $5, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $6, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $7, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $7, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $8, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $9, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $10, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $11, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $12, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $13, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $13, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addl %ecx, %eax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $14, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rax)
+; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rcx)
; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 1c5be03..ac330a7 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -151,23 +151,19 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
; AVX1: ## %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR22706:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
index 6732efc..dbd572d 100644
--- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
@@ -111,7 +111,6 @@ define void @test_sub_cmp(ptr align 8 %start, ptr %end) {
; N32-NEXT: [[CMP_ENTRY:%.*]] = icmp eq ptr [[START]], [[END]]
; N32-NEXT: br i1 [[CMP_ENTRY]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
; N32: [[LOOP_HEADER_PREHEADER]]:
-; N32-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[PTR_DIFF]], i64 1)
; N32-NEXT: br label %[[LOOP_HEADER:.*]]
; N32: [[LOOP_HEADER]]:
; N32-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ]
@@ -119,7 +118,7 @@ define void @test_sub_cmp(ptr align 8 %start, ptr %end) {
; N32-NEXT: br i1 [[C_1]], label %[[EXIT_EARLY:.*]], label %[[LOOP_LATCH]]
; N32: [[LOOP_LATCH]]:
; N32-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
-; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]]
+; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[PTR_DIFF]]
; N32-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT_LOOPEXIT:.*]]
; N32: [[EXIT_EARLY]]:
; N32-NEXT: br label %[[EXIT]]
@@ -162,13 +161,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr
; CHECK-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]]
; CHECK-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]])
+; CHECK-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]]
; CHECK-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]]
; CHECK-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]]
; CHECK: [[LOOP_BODY_PREHEADER]]:
; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond()
-; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
+; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1
+; CHECK-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]]
+; CHECK-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
@@ -182,13 +185,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr
; N32-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]]
; N32-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2
; N32-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]])
+; N32-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]]
; N32-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]]
; N32-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]]
; N32: [[LOOP_BODY_PREHEADER]]:
; N32-NEXT: br label %[[LOOP_BODY:.*]]
; N32: [[LOOP_BODY]]:
+; N32-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ]
; N32-NEXT: [[TMP0:%.*]] = call i1 @cond()
-; N32-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
+; N32-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1
+; N32-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]]
+; N32-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
; N32: [[EXIT_LOOPEXIT]]:
; N32-NEXT: br label %[[EXIT]]
; N32: [[EXIT]]:
diff --git a/llvm/test/Transforms/InstCombine/scmp.ll b/llvm/test/Transforms/InstCombine/scmp.ll
index 2bf22ae..c0be5b9 100644
--- a/llvm/test/Transforms/InstCombine/scmp.ll
+++ b/llvm/test/Transforms/InstCombine/scmp.ll
@@ -423,6 +423,86 @@ define i8 @scmp_from_select_eq_and_gt_commuted3(i32 %x, i32 %y) {
ret i8 %r
}
+; Commutative tests for (x != y) ? (x > y ? 1 : -1) : 0
+define i8 @scmp_from_select_ne_and_gt_commuted1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, %y
+ %gt = icmp slt i32 %x, %y
+ %sel1 = select i1 %gt, i8 1, i8 -1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
+define i8 @scmp_from_select_ne_and_gt_commuted2(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted2(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, %y
+ %gt = icmp sgt i32 %x, %y
+ %sel1 = select i1 %gt, i8 -1, i8 1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
+define i8 @scmp_from_select_ne_and_gt_commuted3(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted3(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, %y
+ %gt = icmp sgt i32 %x, %y
+ %sel1 = select i1 %gt, i8 1, i8 -1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
+; Commutative tests for x != C ? (x > C - 1 ? 1 : -1) : 0
+define i8 @scmp_from_select_ne_const_and_gt_commuted1(i32 %x) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5)
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, 5
+ %gt = icmp sgt i32 %x, 4
+ %sel1 = select i1 %gt, i8 1, i8 -1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
+define i8 @scmp_from_select_ne_const_and_gt_commuted2(i32 %x) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5)
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, 5
+ %gt = icmp sgt i32 %x, 4
+ %sel1 = select i1 %gt, i8 1, i8 -1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
+define i8 @scmp_from_select_ne_const_and_gt_commuted3(i32 %x) {
+; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted3(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5)
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %ne = icmp ne i32 %x, 5
+ %gt = icmp sgt i32 %x, 4
+ %sel1 = select i1 %gt, i8 1, i8 -1
+ %r = select i1 %ne, i8 %sel1, i8 0
+ ret i8 %r
+}
+
define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: define <3 x i2> @scmp_unary_shuffle_ops(
; CHECK-SAME: <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) {
@@ -436,6 +516,187 @@ define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
ret <3 x i2> %r
}
+define i32 @scmp_sgt_slt(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_sgt_slt(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr i32 [[A]], 31
+; CHECK-NEXT: [[CMP_INV:%.*]] = icmp slt i32 [[A]], 1
+; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP_INV]], i32 [[A_LOBIT]], i32 1
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp sgt i32 %a, 0
+ %cmp1 = icmp slt i32 %a, 0
+ %. = select i1 %cmp1, i32 -1, i32 0
+ %retval.0 = select i1 %cmp, i32 1, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @scmp_zero_slt(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_slt(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0)
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1.inv = icmp slt i32 %a, 1
+ %. = select i1 %cmp1.inv, i32 -1, i32 1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @scmp_zero_sgt(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_sgt(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0)
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1.inv = icmp sgt i32 %a, -1
+ %. = select i1 %cmp1.inv, i32 1, i32 -1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
+
+define i32 @scmp_zero_sgt_1(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_sgt_1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[COND2:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0)
+; CHECK-NEXT: ret i32 [[COND2]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1 = icmp sgt i32 %a, -1
+ %cond = select i1 %cmp1, i32 1, i32 -1
+ %cond2 = select i1 %cmp, i32 0, i32 %cond
+ ret i32 %cond2
+}
+
+define i32 @scmp_zero_slt_1(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_slt_1(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[COND2:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0)
+; CHECK-NEXT: ret i32 [[COND2]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1 = icmp slt i32 %a, 1
+ %cond = select i1 %cmp1, i32 -1, i32 1
+ %cond2 = select i1 %cmp, i32 0, i32 %cond
+ ret i32 %cond2
+}
+
+define i32 @scmp_zero_slt_neg(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_slt_neg(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A]], -1
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], i32 -1, i32 1
+; CHECK-NEXT: [[COND2:%.*]] = select i1 [[CMP]], i32 0, i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND2]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1 = icmp slt i32 %a, -1
+ %cond = select i1 %cmp1, i32 -1, i32 1
+ %cond2 = select i1 %cmp, i32 0, i32 %cond
+ ret i32 %cond2
+}
+
+define i32 @scmp_zero_sgt_neg(i32 %a) {
+; CHECK-LABEL: define i32 @scmp_zero_sgt_neg(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 1
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], i32 1, i32 -1
+; CHECK-NEXT: [[COND2:%.*]] = select i1 [[CMP]], i32 0, i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND2]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1 = icmp sgt i32 %a, 1
+ %cond = select i1 %cmp1, i32 1, i32 -1
+ %cond2 = select i1 %cmp, i32 0, i32 %cond
+ ret i32 %cond2
+}
+
+define i32 @ucmp_ugt_ult_neg(i32 %a) {
+; CHECK-LABEL: define i32 @ucmp_ugt_ult_neg(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[A]], 0
+; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[CMP_NOT]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp ugt i32 %a, 0
+ %cmp1 = icmp ult i32 %a, 0
+ %. = select i1 %cmp1, i32 -1, i32 0
+ %retval.0 = select i1 %cmp, i32 1, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @ucmp_zero_ult_neg(i32 %a) {
+; CHECK-LABEL: define i32 @ucmp_zero_ult_neg(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A]], 0
+; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1.inv = icmp ult i32 %a, 1
+ %. = select i1 %cmp1.inv, i32 -1, i32 1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @ucmp_zero_ugt_neg(i32 %a) {
+; CHECK-LABEL: define i32 @ucmp_zero_ugt_neg(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A]], 0
+; CHECK-NEXT: [[RETVAL_0:%.*]] = sext i1 [[CMP]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, 0
+ %cmp1.inv = icmp ugt i32 %a, -1
+ %. = select i1 %cmp1.inv, i32 1, i32 -1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @scmp_sgt_slt_ab(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @scmp_sgt_slt_ab(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp sgt i32 %a, %b
+ %cmp1 = icmp slt i32 %a, %b
+ %. = select i1 %cmp1, i32 -1, i32 0
+ %retval.0 = select i1 %cmp, i32 1, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @scmp_zero_slt_ab(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @scmp_zero_slt_ab(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, %b
+ %cmp1.inv = icmp slt i32 %a, %b
+ %. = select i1 %cmp1.inv, i32 -1, i32 1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
+define i32 @scmp_zero_sgt_ab(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @scmp_zero_sgt_ab(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+ %cmp = icmp eq i32 %a, %b
+ %cmp1.inv = icmp sgt i32 %a, %b
+ %. = select i1 %cmp1.inv, i32 1, i32 -1
+ %retval.0 = select i1 %cmp, i32 0, i32 %.
+ ret i32 %retval.0
+}
+
; Negative test: true value of outer select is not zero
define i8 @scmp_from_select_eq_and_gt_neg1(i32 %x, i32 %y) {
; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg1(
diff --git a/llvm/test/Transforms/SCCP/constant-range-struct.ll b/llvm/test/Transforms/SCCP/constant-range-struct.ll
index 7a399df..0f45b38 100644
--- a/llvm/test/Transforms/SCCP/constant-range-struct.ll
+++ b/llvm/test/Transforms/SCCP/constant-range-struct.ll
@@ -25,7 +25,7 @@ true:
br label %exit
false:
- %s.3 = insertvalue {i64, i64} undef, i64 30, 0
+ %s.3 = insertvalue {i64, i64} poison, i64 30, 0
%s.4 = insertvalue {i64, i64} %s.3, i64 300, 1
br label %exit
@@ -39,14 +39,14 @@ define void @struct1_caller() {
; CHECK-NEXT: [[S:%.*]] = call { i64, i64 } @struct1()
; CHECK-NEXT: [[V1:%.*]] = extractvalue { i64, i64 } [[S]], 0
; CHECK-NEXT: [[V2:%.*]] = extractvalue { i64, i64 } [[S]], 1
-; CHECK-NEXT: [[T_1:%.*]] = icmp ne i64 [[V1]], 10
-; CHECK-NEXT: call void @use(i1 [[T_1]])
-; CHECK-NEXT: [[T_2:%.*]] = icmp ult i64 [[V1]], 100
-; CHECK-NEXT: call void @use(i1 [[T_2]])
-; CHECK-NEXT: [[T_3:%.*]] = icmp ne i64 [[V2]], 0
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[T_3:%.*]] = icmp eq i64 [[V1]], 20
; CHECK-NEXT: call void @use(i1 [[T_3]])
-; CHECK-NEXT: [[T_4:%.*]] = icmp ult i64 [[V2]], 301
-; CHECK-NEXT: call void @use(i1 [[T_4]])
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[T_6:%.*]] = icmp eq i64 [[V2]], 300
+; CHECK-NEXT: call void @use(i1 [[T_6]])
; CHECK-NEXT: ret void
;
%s = call {i64, i64} @struct1()
@@ -57,10 +57,14 @@ define void @struct1_caller() {
call void @use(i1 %t.1)
%t.2 = icmp ult i64 %v1, 100
call void @use(i1 %t.2)
- %t.3 = icmp ne i64 %v2, 0
+ %t.3 = icmp eq i64 %v1, 20
call void @use(i1 %t.3)
- %t.4 = icmp ult i64 %v2, 301
+ %t.4 = icmp ne i64 %v2, 0
call void @use(i1 %t.4)
+ %t.5 = icmp ult i64 %v2, 301
+ call void @use(i1 %t.5)
+ %t.6 = icmp eq i64 %v2, 300
+ call void @use(i1 %t.6)
ret void
}
@@ -76,7 +80,7 @@ define internal {i64, i64} @struct2() {
; CHECK: exit:
; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 20, [[TRUE]] ], [ 30, [[FALSE]] ]
; CHECK-NEXT: [[V2:%.*]] = phi i64 [ 200, [[TRUE]] ], [ 300, [[FALSE]] ]
-; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } undef, i64 [[V1]], 0
+; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } poison, i64 [[V1]], 0
; CHECK-NEXT: [[S_2:%.*]] = insertvalue { i64, i64 } [[S_1]], i64 [[V2]], 1
; CHECK-NEXT: ret { i64, i64 } [[S_2]]
;
@@ -92,7 +96,7 @@ false:
exit:
%v1 = phi i64 [ 20, %true ], [ 30, %false ]
%v2 = phi i64 [ 200, %true ], [ 300, %false ]
- %s.1 = insertvalue {i64, i64} undef, i64 %v1, 0
+ %s.1 = insertvalue {i64, i64} poison, i64 %v1, 0
%s.2 = insertvalue {i64, i64} %s.1, i64 %v2, 1
ret {i64, i64} %s.2
}
@@ -153,3 +157,40 @@ define void @struct2_caller() {
ret void
}
+
+%"phi_type" = type {i64, i64}
+
+define internal %"phi_type" @test(i32 %input) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: br label [[COND_TRUE_I:%.*]]
+; CHECK: cond.true.i:
+; CHECK-NEXT: br label [[COND_END_I:%.*]]
+; CHECK: cond.end.i:
+; CHECK-NEXT: ret [[PHI_TYPE:%.*]] poison
+;
+ %cmp.cond = icmp eq i32 %input, 1
+ br i1 %cmp.cond, label %cond.true.i, label %cond.false.i
+
+cond.true.i:
+ %r1.tmp = insertvalue %"phi_type" poison, i64 1, 0
+ %r1.tmp.2 = insertvalue %"phi_type" %r1.tmp, i64 2, 1
+ br label %cond.end.i
+
+cond.false.i:
+ %r2.tmp = insertvalue %"phi_type" poison, i64 3, 0
+ %r2.tmp.2 = insertvalue %"phi_type" %r2.tmp, i64 4, 1
+ br label %cond.end.i
+
+cond.end.i:
+ %retval = phi %"phi_type" [ %r1.tmp.2, %cond.true.i ], [ %r2.tmp.2, %cond.false.i ]
+ ret %"phi_type" %retval
+}
+
+define %"phi_type" @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[CALL_1:%.*]] = tail call fastcc [[PHI_TYPE:%.*]] @[[TEST:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 noundef 1)
+; CHECK-NEXT: ret [[PHI_TYPE]] { i64 1, i64 2 }
+;
+ %call.1 = tail call fastcc noundef %"phi_type" @test(i32 noundef 1)
+ ret %"phi_type" %call.1
+}
diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll b/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll
new file mode 100644
index 0000000..94b45d2
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions --test FileCheck --test-arg --check-prefixes=CHECK,INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck -check-prefixes=CHECK,RESULT %s < %t
+
+; CHECK-LABEL: define void @alloca(
+; INTERESTING: call void @llvm.lifetime.start.p0(
+; INTERESTING: call void @llvm.lifetime.end.p0(
+
+; RESULT: call void @llvm.lifetime.start.p0(ptr poison)
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr poison)
+; RESULT-NEXT: ret void
+define void @alloca(ptr %ptr) {
+ %alloca = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ call void @llvm.lifetime.end.p0(ptr %alloca)
+ ret void
+}
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
index f1f5d6b..19b69e8 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
@@ -13,6 +13,8 @@
#include "ReduceInstructions.h"
#include "Utils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
using namespace llvm;
@@ -37,7 +39,9 @@ void llvm::reduceInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) {
for (auto &Inst :
make_early_inc_range(make_range(BB.begin(), std::prev(BB.end())))) {
if (!shouldAlwaysKeep(Inst) && !O.shouldKeep()) {
- Inst.replaceAllUsesWith(getDefaultValue(Inst.getType()));
+ Inst.replaceAllUsesWith(isa<AllocaInst>(Inst)
+ ? PoisonValue::get(Inst.getType())
+ : getDefaultValue(Inst.getType()));
Inst.eraseFromParent();
}
}
diff --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp
index eaed4e1..5b3df91 100644
--- a/llvm/unittests/ADT/BitTest.cpp
+++ b/llvm/unittests/ADT/BitTest.cpp
@@ -270,6 +270,22 @@ TEST(BitTest, BitWidthConstexpr) {
llvm::bit_width_constexpr(std::numeric_limits<uint64_t>::max()) == 64);
}
+TEST(BitTest, BitCeilConstexpr) {
+ static_assert(llvm::bit_ceil_constexpr(0u) == 1);
+ static_assert(llvm::bit_ceil_constexpr(1u) == 1);
+ static_assert(llvm::bit_ceil_constexpr(2u) == 2);
+ static_assert(llvm::bit_ceil_constexpr(3u) == 4);
+ static_assert(llvm::bit_ceil_constexpr(4u) == 4);
+ static_assert(llvm::bit_ceil_constexpr(5u) == 8);
+ static_assert(llvm::bit_ceil_constexpr(6u) == 8);
+ static_assert(llvm::bit_ceil_constexpr(7u) == 8);
+ static_assert(llvm::bit_ceil_constexpr(8u) == 8);
+
+ static_assert(llvm::bit_ceil_constexpr(255u) == 256);
+ static_assert(llvm::bit_ceil_constexpr(256u) == 256);
+ static_assert(llvm::bit_ceil_constexpr(257u) == 512);
+}
+
TEST(BitTest, CountlZero) {
uint8_t Z8 = 0;
uint16_t Z16 = 0;