diff options
Diffstat (limited to 'llvm')
98 files changed, 23148 insertions, 649 deletions
diff --git a/llvm/benchmarks/SpecialCaseListBM.cpp b/llvm/benchmarks/SpecialCaseListBM.cpp index 00aa3cd..b5d8268 100644 --- a/llvm/benchmarks/SpecialCaseListBM.cpp +++ b/llvm/benchmarks/SpecialCaseListBM.cpp @@ -110,6 +110,26 @@ std::string genGlobAtBothSides(const std::vector<std::string> &Files) { return S; } +std::string genGlobAtBothSidesAndMid(const std::vector<std::string> &Files) { + std::string S; + std::minstd_rand Rng(RNG_SEED); + for (std::string F : Files) { + std::uniform_int_distribution<> PosDistrib(0, F.size() - 1); + F[PosDistrib(Rng)] = '*'; + + std::uniform_int_distribution<> Ends(0, 1); + if (Ends(Rng)) { + F.back() = '*'; + F.front() = '*'; + } + + S += "src:"; + S += F; + S += "\n"; + } + return S; +} + void BM_Make_( benchmark::State &state, std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) { @@ -171,6 +191,9 @@ BENCHMARK_CAPTURE(BM_Make_, Mid__, genGlobInMid) BENCHMARK_CAPTURE(BM_Make_, Both_, genGlobAtBothSides) ->RangeMultiplier(MAX_LIST_MUL) ->Range(MAX_LIST_MIN, MAX_LIST_MAX); +BENCHMARK_CAPTURE(BM_Make_, Mix__, genGlobAtBothSidesAndMid) + ->RangeMultiplier(MAX_LIST_MUL) + ->Range(MAX_LIST_MIN, MAX_LIST_MAX); BENCHMARK_CAPTURE(BM_True_, None_, genGlobNone) ->RangeMultiplier(MAX_LIST_MUL) @@ -187,6 +210,9 @@ BENCHMARK_CAPTURE(BM_True_, Mid__, genGlobInMid) BENCHMARK_CAPTURE(BM_True_, Both_, genGlobAtBothSides) ->RangeMultiplier(MAX_LIST_MUL) ->Range(MAX_LIST_MIN, MAX_LIST_MAX); +BENCHMARK_CAPTURE(BM_True_, Mix__, genGlobAtBothSidesAndMid) + ->RangeMultiplier(MAX_LIST_MUL) + ->Range(MAX_LIST_MIN, MAX_LIST_MAX); BENCHMARK_CAPTURE(BM_False, None_, genGlobNone) ->RangeMultiplier(MAX_LIST_MUL) @@ -203,5 +229,8 @@ BENCHMARK_CAPTURE(BM_False, Mid__, genGlobInMid) BENCHMARK_CAPTURE(BM_False, Both_, genGlobAtBothSides) ->RangeMultiplier(MAX_LIST_MUL) ->Range(MAX_LIST_MIN, MAX_LIST_MAX); +BENCHMARK_CAPTURE(BM_False, Mix__, genGlobAtBothSidesAndMid) + ->RangeMultiplier(MAX_LIST_MUL) + ->Range(MAX_LIST_MIN, MAX_LIST_MAX); BENCHMARK_MAIN(); diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index a1bfce7..bccdb89 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -138,10 +138,16 @@ enum lostFraction { // Example of truncated bits: /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward. /// +namespace detail { +class IEEEFloat; +class DoubleAPFloat; +} // namespace detail + // This is the common type definitions shared by APFloat and its internal // implementation classes. This struct should not define any non-static data // members. -struct APFloatBase { +class APFloatBase { +public: typedef APInt::WordType integerPart; static constexpr unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD; @@ -257,30 +263,64 @@ struct APFloatBase { LLVM_ABI static const llvm::fltSemantics &EnumToSemantics(Semantics S); LLVM_ABI static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem); - LLVM_ABI static const fltSemantics &IEEEhalf() LLVM_READNONE; - LLVM_ABI static const fltSemantics &BFloat() LLVM_READNONE; - LLVM_ABI static const fltSemantics &IEEEsingle() LLVM_READNONE; - LLVM_ABI static const fltSemantics &IEEEdouble() LLVM_READNONE; - LLVM_ABI static const fltSemantics &IEEEquad() LLVM_READNONE; - LLVM_ABI static const fltSemantics &PPCDoubleDouble() LLVM_READNONE; - LLVM_ABI static const fltSemantics &PPCDoubleDoubleLegacy() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E5M2() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E4M3() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E4M3FN() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E3M4() LLVM_READNONE; - LLVM_ABI static const fltSemantics &FloatTF32() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float8E8M0FNU() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float6E3M2FN() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float6E2M3FN() LLVM_READNONE; - LLVM_ABI static const fltSemantics &Float4E2M1FN() LLVM_READNONE; - LLVM_ABI static const fltSemantics &x87DoubleExtended() LLVM_READNONE; +private: + LLVM_ABI static const fltSemantics semIEEEhalf; + LLVM_ABI static const fltSemantics semBFloat; + LLVM_ABI static const fltSemantics semIEEEsingle; + LLVM_ABI static const fltSemantics semIEEEdouble; + LLVM_ABI static const fltSemantics semIEEEquad; + LLVM_ABI static const fltSemantics semFloat8E5M2; + LLVM_ABI static const fltSemantics semFloat8E5M2FNUZ; + LLVM_ABI static const fltSemantics semFloat8E4M3; + LLVM_ABI static const fltSemantics semFloat8E4M3FN; + LLVM_ABI static const fltSemantics semFloat8E4M3FNUZ; + LLVM_ABI static const fltSemantics semFloat8E4M3B11FNUZ; + LLVM_ABI static const fltSemantics semFloat8E3M4; + LLVM_ABI static const fltSemantics semFloatTF32; + LLVM_ABI static const fltSemantics semFloat8E8M0FNU; + LLVM_ABI static const fltSemantics semFloat6E3M2FN; + LLVM_ABI static const fltSemantics semFloat6E2M3FN; + LLVM_ABI static const fltSemantics semFloat4E2M1FN; + LLVM_ABI static const fltSemantics semX87DoubleExtended; + LLVM_ABI static const fltSemantics semBogus; + LLVM_ABI static const fltSemantics semPPCDoubleDouble; + LLVM_ABI static const fltSemantics semPPCDoubleDoubleLegacy; + + friend class detail::IEEEFloat; + friend class detail::DoubleAPFloat; + friend class APFloat; + +public: + static const fltSemantics &IEEEhalf() { return semIEEEhalf; } + static const fltSemantics &BFloat() { return semBFloat; } + static const fltSemantics &IEEEsingle() { return semIEEEsingle; } + static const fltSemantics &IEEEdouble() { return semIEEEdouble; } + static const fltSemantics &IEEEquad() { return semIEEEquad; } + static const fltSemantics &PPCDoubleDouble() { return semPPCDoubleDouble; } + static const fltSemantics &PPCDoubleDoubleLegacy() { + return semPPCDoubleDoubleLegacy; + } + static const fltSemantics &Float8E5M2() { return semFloat8E5M2; } + static const fltSemantics &Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } + static const fltSemantics &Float8E4M3() { return semFloat8E4M3; } + static const fltSemantics &Float8E4M3FN() { return semFloat8E4M3FN; } + static const fltSemantics &Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } + static const fltSemantics &Float8E4M3B11FNUZ() { + return semFloat8E4M3B11FNUZ; + } + static const fltSemantics &Float8E3M4() { return semFloat8E3M4; } + static const fltSemantics &FloatTF32() { return semFloatTF32; } + static const fltSemantics &Float8E8M0FNU() { return semFloat8E8M0FNU; } + static const fltSemantics &Float6E3M2FN() { return semFloat6E3M2FN; } + static const fltSemantics &Float6E2M3FN() { return semFloat6E2M3FN; } + static const fltSemantics &Float4E2M1FN() { return semFloat4E2M1FN; } + static const fltSemantics &x87DoubleExtended() { + return semX87DoubleExtended; + } /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with /// anything real. - LLVM_ABI static const fltSemantics &Bogus() LLVM_READNONE; + static const fltSemantics &Bogus() { return semBogus; } // Returns true if any number described by this semantics can be precisely // represented by the specified semantics. Does not take into account @@ -927,69 +967,11 @@ class APFloat : public APFloatBase { llvm_unreachable("Unexpected semantics"); } - ~Storage() { - if (usesLayout<IEEEFloat>(*semantics)) { - IEEE.~IEEEFloat(); - return; - } - if (usesLayout<DoubleAPFloat>(*semantics)) { - Double.~DoubleAPFloat(); - return; - } - llvm_unreachable("Unexpected semantics"); - } - - Storage(const Storage &RHS) { - if (usesLayout<IEEEFloat>(*RHS.semantics)) { - new (this) IEEEFloat(RHS.IEEE); - return; - } - if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { - new (this) DoubleAPFloat(RHS.Double); - return; - } - llvm_unreachable("Unexpected semantics"); - } - - Storage(Storage &&RHS) { - if (usesLayout<IEEEFloat>(*RHS.semantics)) { - new (this) IEEEFloat(std::move(RHS.IEEE)); - return; - } - if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { - new (this) DoubleAPFloat(std::move(RHS.Double)); - return; - } - llvm_unreachable("Unexpected semantics"); - } - - Storage &operator=(const Storage &RHS) { - if (usesLayout<IEEEFloat>(*semantics) && - usesLayout<IEEEFloat>(*RHS.semantics)) { - IEEE = RHS.IEEE; - } else if (usesLayout<DoubleAPFloat>(*semantics) && - usesLayout<DoubleAPFloat>(*RHS.semantics)) { - Double = RHS.Double; - } else if (this != &RHS) { - this->~Storage(); - new (this) Storage(RHS); - } - return *this; - } - - Storage &operator=(Storage &&RHS) { - if (usesLayout<IEEEFloat>(*semantics) && - usesLayout<IEEEFloat>(*RHS.semantics)) { - IEEE = std::move(RHS.IEEE); - } else if (usesLayout<DoubleAPFloat>(*semantics) && - usesLayout<DoubleAPFloat>(*RHS.semantics)) { - Double = std::move(RHS.Double); - } else if (this != &RHS) { - this->~Storage(); - new (this) Storage(std::move(RHS)); - } - return *this; - } + LLVM_ABI ~Storage(); + LLVM_ABI Storage(const Storage &RHS); + LLVM_ABI Storage(Storage &&RHS); + LLVM_ABI Storage &operator=(const Storage &RHS); + LLVM_ABI Storage &operator=(Storage &&RHS); } U; template <typename T> static bool usesLayout(const fltSemantics &Semantics) { diff --git a/llvm/include/llvm/ADT/Bitfields.h b/llvm/include/llvm/ADT/Bitfields.h index 1af2761..1fbc41c 100644 --- a/llvm/include/llvm/ADT/Bitfields.h +++ b/llvm/include/llvm/ADT/Bitfields.h @@ -154,12 +154,9 @@ struct ResolveUnderlyingType { using type = std::underlying_type_t<T>; }; template <typename T> struct ResolveUnderlyingType<T, false> { - using type = T; -}; -template <> struct ResolveUnderlyingType<bool, false> { - /// In case sizeof(bool) != 1, replace `void` by an additionnal - /// std::conditional. - using type = std::conditional_t<sizeof(bool) == 1, uint8_t, void>; + static_assert(!std::is_same_v<T, bool> || sizeof(bool) == 1, + "T being bool requires sizeof(bool) == 1."); + using type = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>; }; } // namespace bitfields_details diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 4bda50f..25b5262 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -42,7 +42,7 @@ namespace detail { // We extend a pair to allow users to override the bucket type with their own // implementation without requiring two members. template <typename KeyT, typename ValueT> -struct DenseMapPair : public std::pair<KeyT, ValueT> { +struct DenseMapPair : std::pair<KeyT, ValueT> { using std::pair<KeyT, ValueT>::pair; KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; } diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h index 4ced758..3c54f32 100644 --- a/llvm/include/llvm/ADT/DepthFirstIterator.h +++ b/llvm/include/llvm/ADT/DepthFirstIterator.h @@ -66,8 +66,8 @@ public: // one more method, completed, which is invoked when all children of a // node have been processed. It is intended to distinguish of back and // cross edges in the spanning tree but is not used in the common case. -template <typename NodeRef, unsigned SmallSize=8> -struct df_iterator_default_set : public SmallPtrSet<NodeRef, SmallSize> { +template <typename NodeRef, unsigned SmallSize = 8> +struct df_iterator_default_set : SmallPtrSet<NodeRef, SmallSize> { using BaseSet = SmallPtrSet<NodeRef, SmallSize>; using iterator = typename BaseSet::iterator; @@ -235,8 +235,10 @@ iterator_range<df_iterator<T>> depth_first(const T& G) { } // Provide global definitions of external depth first iterators... -template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>> -struct df_ext_iterator : public df_iterator<T, SetTy, true> { +template <class T, + class SetTy = + df_iterator_default_set<typename GraphTraits<T>::NodeRef>> +struct df_ext_iterator : df_iterator<T, SetTy, true> { df_ext_iterator(const df_iterator<T, SetTy, true> &V) : df_iterator<T, SetTy, true>(V) {} }; @@ -262,7 +264,7 @@ template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>, bool External = false> -struct idf_iterator : public df_iterator<Inverse<T>, SetTy, External> { +struct idf_iterator : df_iterator<Inverse<T>, SetTy, External> { idf_iterator(const df_iterator<Inverse<T>, SetTy, External> &V) : df_iterator<Inverse<T>, SetTy, External>(V) {} }; @@ -284,8 +286,10 @@ iterator_range<idf_iterator<T>> inverse_depth_first(const T& G) { } // Provide global definitions of external inverse depth first iterators... -template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>> -struct idf_ext_iterator : public idf_iterator<T, SetTy, true> { +template <class T, + class SetTy = + df_iterator_default_set<typename GraphTraits<T>::NodeRef>> +struct idf_ext_iterator : idf_iterator<T, SetTy, true> { idf_ext_iterator(const idf_iterator<T, SetTy, true> &V) : idf_iterator<T, SetTy, true>(V) {} idf_ext_iterator(const df_iterator<Inverse<T>, SetTy, true> &V) diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index 310539f..8b2425e 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -931,8 +931,7 @@ struct ImutProfileInfo<T*> { /// ImutContainerInfo - Generic definition of comparison operations for /// elements of immutable containers that defaults to using /// std::equal_to<> and std::less<> to perform comparison of elements. -template <typename T> -struct ImutContainerInfo : public ImutProfileInfo<T> { +template <typename T> struct ImutContainerInfo : ImutProfileInfo<T> { using value_type = typename ImutProfileInfo<T>::value_type; using value_type_ref = typename ImutProfileInfo<T>::value_type_ref; using key_type = value_type; @@ -957,8 +956,7 @@ struct ImutContainerInfo : public ImutProfileInfo<T> { /// ImutContainerInfo - Specialization for pointer values to treat pointers /// as references to unique objects. Pointers are thus compared by /// their addresses. -template <typename T> -struct ImutContainerInfo<T*> : public ImutProfileInfo<T*> { +template <typename T> struct ImutContainerInfo<T *> : ImutProfileInfo<T *> { using value_type = typename ImutProfileInfo<T*>::value_type; using value_type_ref = typename ImutProfileInfo<T*>::value_type_ref; using key_type = value_type; diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h index 1cbd3c1..d9aa452 100644 --- a/llvm/include/llvm/ADT/PostOrderIterator.h +++ b/llvm/include/llvm/ADT/PostOrderIterator.h @@ -200,7 +200,7 @@ template <class T> iterator_range<po_iterator<T>> post_order(const T &G) { // Provide global definitions of external postorder iterators... template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>> -struct po_ext_iterator : public po_iterator<T, SetType, true> { +struct po_ext_iterator : po_iterator<T, SetType, true> { po_ext_iterator(const po_iterator<T, SetType, true> &V) : po_iterator<T, SetType, true>(V) {} }; @@ -223,7 +223,7 @@ iterator_range<po_ext_iterator<T, SetType>> post_order_ext(const T &G, SetType & // Provide global definitions of inverse post order iterators... template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>, bool External = false> -struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External> { +struct ipo_iterator : po_iterator<Inverse<T>, SetType, External> { ipo_iterator(const po_iterator<Inverse<T>, SetType, External> &V) : po_iterator<Inverse<T>, SetType, External> (V) {} }; @@ -245,7 +245,7 @@ iterator_range<ipo_iterator<T>> inverse_post_order(const T &G) { // Provide global definitions of external inverse postorder iterators... template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>> -struct ipo_ext_iterator : public ipo_iterator<T, SetType, true> { +struct ipo_ext_iterator : ipo_iterator<T, SetType, true> { ipo_ext_iterator(const ipo_iterator<T, SetType, true> &V) : ipo_iterator<T, SetType, true>(V) {} ipo_ext_iterator(const po_iterator<Inverse<T>, SetType, true> &V) : diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 658f262..a9841c6 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -674,7 +674,7 @@ using zip_traits = iterator_facade_base< ReferenceTupleType *, ReferenceTupleType>; template <typename ZipType, typename ReferenceTupleType, typename... Iters> -struct zip_common : public zip_traits<ZipType, ReferenceTupleType, Iters...> { +struct zip_common : zip_traits<ZipType, ReferenceTupleType, Iters...> { using Base = zip_traits<ZipType, ReferenceTupleType, Iters...>; using IndexSequence = std::index_sequence_for<Iters...>; using value_type = typename Base::value_type; diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index da9d3ab0..273a5cf 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -26,6 +26,54 @@ namespace llvm { // Features from C++20 //===----------------------------------------------------------------------===// +namespace numbers { +// clang-format off +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T e_v = T(0x1.5bf0a8b145769P+1); // (2.7182818284590452354) https://oeis.org/A001113 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T egamma_v = T(0x1.2788cfc6fb619P-1); // (.57721566490153286061) https://oeis.org/A001620 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T ln2_v = T(0x1.62e42fefa39efP-1); // (.69314718055994530942) https://oeis.org/A002162 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T ln10_v = T(0x1.26bb1bbb55516P+1); // (2.3025850929940456840) https://oeis.org/A002392 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T log2e_v = T(0x1.71547652b82feP+0); // (1.4426950408889634074) +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T log10e_v = T(0x1.bcb7b1526e50eP-2); // (.43429448190325182765) +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T pi_v = T(0x1.921fb54442d18P+1); // (3.1415926535897932385) https://oeis.org/A000796 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T inv_pi_v = T(0x1.45f306dc9c883P-2); // (.31830988618379067154) https://oeis.org/A049541 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T inv_sqrtpi_v = T(0x1.20dd750429b6dP-1); // (.56418958354775628695) https://oeis.org/A087197 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T sqrt2_v = T(0x1.6a09e667f3bcdP+0); // (1.4142135623730950488) https://oeis.org/A00219 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T inv_sqrt2_v = T(0x1.6a09e667f3bcdP-1); // (.70710678118654752440) +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T sqrt3_v = T(0x1.bb67ae8584caaP+0); // (1.7320508075688772935) https://oeis.org/A002194 +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T inv_sqrt3_v = T(0x1.279a74590331cP-1); // (.57735026918962576451) +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T phi_v = T(0x1.9e3779b97f4a8P+0); // (1.6180339887498948482) https://oeis.org/A001622 + +inline constexpr double e = e_v<double>; +inline constexpr double egamma = egamma_v<double>; +inline constexpr double ln2 = ln2_v<double>; +inline constexpr double ln10 = ln10_v<double>; +inline constexpr double log2e = log2e_v<double>; +inline constexpr double log10e = log10e_v<double>; +inline constexpr double pi = pi_v<double>; +inline constexpr double inv_pi = inv_pi_v<double>; +inline constexpr double inv_sqrtpi = inv_sqrtpi_v<double>; +inline constexpr double sqrt2 = sqrt2_v<double>; +inline constexpr double inv_sqrt2 = inv_sqrt2_v<double>; +inline constexpr double sqrt3 = sqrt3_v<double>; +inline constexpr double inv_sqrt3 = inv_sqrt3_v<double>; +inline constexpr double phi = phi_v<double>; +// clang-format on +} // namespace numbers + template <typename T> struct remove_cvref // NOLINT(readability-identifier-naming) { diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index f588a77..8e7c8b3 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -532,18 +532,8 @@ class SmallPtrSet : public SmallPtrSetImpl<PtrType> { using BaseT = SmallPtrSetImpl<PtrType>; - // A constexpr version of llvm::bit_ceil. - // TODO: Replace this with std::bit_ceil once C++20 is available. - static constexpr size_t RoundUpToPowerOfTwo(size_t X) { - size_t C = 1; - size_t CMax = C << (std::numeric_limits<size_t>::digits - 1); - while (C < X && C < CMax) - C <<= 1; - return C; - } - // Make sure that SmallSize is a power of two, round up if not. - static constexpr size_t SmallSizePowTwo = RoundUpToPowerOfTwo(SmallSize); + static constexpr size_t SmallSizePowTwo = llvm::bit_ceil_constexpr(SmallSize); /// SmallStorage - Fixed size storage used in 'small mode'. const void *SmallStorage[SmallSizePowTwo]; diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index 66c4f94..8b60b69 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -336,34 +336,44 @@ template <typename T> [[nodiscard]] T bit_ceil(T Value) { return T(1) << llvm::bit_width<T>(Value - 1u); } -// Forward-declare rotr so that rotl can use it. -template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>> -[[nodiscard]] constexpr T rotr(T V, int R); +/// Returns the smallest integral power of two no smaller than Value if Value is +/// nonzero. Returns 1 otherwise. +/// +/// Ex. bit_ceil(5) == 8. +/// +/// The return value is undefined if the input is larger than the largest power +/// of two representable in T. +template <typename T> [[nodiscard]] constexpr T bit_ceil_constexpr(T Value) { + static_assert(std::is_unsigned_v<T>, + "Only unsigned integral types are allowed."); + if (Value < 2) + return 1; + return T(1) << llvm::bit_width_constexpr<T>(Value - 1u); +} template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>> [[nodiscard]] constexpr T rotl(T V, int R) { - unsigned N = std::numeric_limits<T>::digits; + constexpr unsigned N = std::numeric_limits<T>::digits; - R = R % N; - if (!R) - return V; + static_assert(has_single_bit(N), "& (N - 1) is only valid for powers of two"); + R = R & (N - 1); - if (R < 0) - return llvm::rotr(V, -R); + if (R == 0) + return V; return (V << R) | (V >> (N - R)); } -template <typename T, typename> [[nodiscard]] constexpr T rotr(T V, int R) { - unsigned N = std::numeric_limits<T>::digits; +template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>> +[[nodiscard]] constexpr T rotr(T V, int R) { + constexpr unsigned N = std::numeric_limits<T>::digits; + + static_assert(has_single_bit(N), "& (N - 1) is only valid for powers of two"); + R = R & (N - 1); - R = R % N; - if (!R) + if (R == 0) return V; - if (R < 0) - return llvm::rotl(V, -R); - return (V >> R) | (V << (N - R)); } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index e5a6c8c..3d3ec14 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1345,6 +1345,7 @@ public: class LoopGuards { DenseMap<const SCEV *, const SCEV *> RewriteMap; + SmallDenseSet<std::pair<const SCEV *, const SCEV *>> NotEqual; bool PreserveNUW = false; bool PreserveNSW = false; ScalarEvolution &SE; diff --git a/llvm/include/llvm/Support/Alignment.h b/llvm/include/llvm/Support/Alignment.h index a4ca54e..f9d7c76 100644 --- a/llvm/include/llvm/Support/Alignment.h +++ b/llvm/include/llvm/Support/Alignment.h @@ -103,7 +103,7 @@ inline Align assumeAligned(uint64_t Value) { /// This struct is a compact representation of a valid (power of two) or /// undefined (0) alignment. -struct MaybeAlign : public std::optional<Align> { +struct MaybeAlign : std::optional<Align> { private: using UP = std::optional<Align>; diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h index 2a9a149..6f6df2e 100644 --- a/llvm/include/llvm/Support/Casting.h +++ b/llvm/include/llvm/Support/Casting.h @@ -340,7 +340,7 @@ struct ValueFromPointerCast /// during the cast. It's also a good example of how to implement a move-only /// cast. template <typename To, typename From, typename Derived = void> -struct UniquePtrCast : public CastIsPossible<To, From *> { +struct UniquePtrCast : CastIsPossible<To, From *> { using Self = detail::SelfType<Derived, UniquePtrCast<To, From>>; using CastResultType = std::unique_ptr< std::remove_reference_t<typename cast_retty<To, From>::ret_type>>; @@ -473,7 +473,7 @@ struct ForwardToPointerCast { // take advantage of the cast traits whenever possible! template <typename To, typename From, typename Enable = void> -struct CastInfo : public CastIsPossible<To, From> { +struct CastInfo : CastIsPossible<To, From> { using Self = CastInfo<To, From, Enable>; using CastReturnType = typename cast_retty<To, From>::ret_type; @@ -536,8 +536,7 @@ struct CastInfo<To, std::unique_ptr<From>> : public UniquePtrCast<To, From> {}; /// the input is std::optional<From> that the output can be std::optional<To>. /// If that's not the case, specialize CastInfo for your use case. template <typename To, typename From> -struct CastInfo<To, std::optional<From>> : public OptionalValueCast<To, From> { -}; +struct CastInfo<To, std::optional<From>> : OptionalValueCast<To, From> {}; /// isa<X> - Return true if the parameter to the template is an instance of one /// of the template type arguments. Used like this: diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index dd05c53..5a5f00e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -549,7 +549,7 @@ template <class DataType> struct OptionValue; // The default value safely does nothing. Option value printing is only // best-effort. template <class DataType, bool isClass> -struct OptionValueBase : public GenericOptionValue { +struct OptionValueBase : GenericOptionValue { // Temporary storage for argument passing. using WrapperType = OptionValue<DataType>; diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h index ffa9abe..3b9fe00 100644 --- a/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/llvm/include/llvm/Support/DOTGraphTraits.h @@ -162,9 +162,8 @@ public: /// graphs are converted to 'dot' graphs. When specializing, you may inherit /// from DefaultDOTGraphTraits if you don't need to override everything. /// -template <typename Ty> -struct DOTGraphTraits : public DefaultDOTGraphTraits { - DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {} +template <typename Ty> struct DOTGraphTraits : DefaultDOTGraphTraits { + using DefaultDOTGraphTraits::DefaultDOTGraphTraits; }; } // End llvm namespace diff --git a/llvm/include/llvm/Support/ELFAttributes.h b/llvm/include/llvm/Support/ELFAttributes.h index 270246f..5771a84 100644 --- a/llvm/include/llvm/Support/ELFAttributes.h +++ b/llvm/include/llvm/Support/ELFAttributes.h @@ -48,8 +48,6 @@ struct SubsectionAndTagToTagName { StringRef SubsectionName; unsigned Tag; StringRef TagName; - SubsectionAndTagToTagName(StringRef SN, unsigned Tg, StringRef TN) - : SubsectionName(SN), Tag(Tg), TagName(TN) {} }; namespace ELFAttrs { diff --git a/llvm/include/llvm/Support/LSP/Protocol.h b/llvm/include/llvm/Support/LSP/Protocol.h index 93b82f1..e38203a 100644 --- a/llvm/include/llvm/Support/LSP/Protocol.h +++ b/llvm/include/llvm/Support/LSP/Protocol.h @@ -449,7 +449,7 @@ struct ReferenceContext { bool fromJSON(const llvm::json::Value &value, ReferenceContext &result, llvm::json::Path path); -struct ReferenceParams : public TextDocumentPositionParams { +struct ReferenceParams : TextDocumentPositionParams { ReferenceContext context; }; diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h index ed29826..4ba3867 100644 --- a/llvm/include/llvm/Support/MD5.h +++ b/llvm/include/llvm/Support/MD5.h @@ -41,7 +41,7 @@ template <typename T> class ArrayRef; class MD5 { public: - struct MD5Result : public std::array<uint8_t, 16> { + struct MD5Result : std::array<uint8_t, 16> { LLVM_ABI SmallString<32> digest() const; uint64_t low() const { diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index c2716a9..41232335 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_MATHEXTRAS_H #define LLVM_SUPPORT_MATHEXTRAS_H +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/bit.h" #include "llvm/Support/Compiler.h" #include <cassert> @@ -42,38 +43,28 @@ using common_sint = /// Mathematical constants. namespace numbers { -// TODO: Track C++20 std::numbers. // clang-format off -constexpr double e = 0x1.5bf0a8b145769P+1, // (2.7182818284590452354) https://oeis.org/A001113 - egamma = 0x1.2788cfc6fb619P-1, // (.57721566490153286061) https://oeis.org/A001620 - ln2 = 0x1.62e42fefa39efP-1, // (.69314718055994530942) https://oeis.org/A002162 - ln10 = 0x1.26bb1bbb55516P+1, // (2.3025850929940456840) https://oeis.org/A002392 - log2e = 0x1.71547652b82feP+0, // (1.4426950408889634074) - log10e = 0x1.bcb7b1526e50eP-2, // (.43429448190325182765) - pi = 0x1.921fb54442d18P+1, // (3.1415926535897932385) https://oeis.org/A000796 - inv_pi = 0x1.45f306dc9c883P-2, // (.31830988618379067154) https://oeis.org/A049541 - sqrtpi = 0x1.c5bf891b4ef6bP+0, // (1.7724538509055160273) https://oeis.org/A002161 - inv_sqrtpi = 0x1.20dd750429b6dP-1, // (.56418958354775628695) https://oeis.org/A087197 - sqrt2 = 0x1.6a09e667f3bcdP+0, // (1.4142135623730950488) https://oeis.org/A00219 - inv_sqrt2 = 0x1.6a09e667f3bcdP-1, // (.70710678118654752440) - sqrt3 = 0x1.bb67ae8584caaP+0, // (1.7320508075688772935) https://oeis.org/A002194 - inv_sqrt3 = 0x1.279a74590331cP-1, // (.57735026918962576451) - phi = 0x1.9e3779b97f4a8P+0; // (1.6180339887498948482) https://oeis.org/A001622 -constexpr float ef = 0x1.5bf0a8P+1F, // (2.71828183) https://oeis.org/A001113 - egammaf = 0x1.2788d0P-1F, // (.577215665) https://oeis.org/A001620 - ln2f = 0x1.62e430P-1F, // (.693147181) https://oeis.org/A002162 - ln10f = 0x1.26bb1cP+1F, // (2.30258509) https://oeis.org/A002392 - log2ef = 0x1.715476P+0F, // (1.44269504) - log10ef = 0x1.bcb7b2P-2F, // (.434294482) - pif = 0x1.921fb6P+1F, // (3.14159265) https://oeis.org/A000796 - inv_pif = 0x1.45f306P-2F, // (.318309886) https://oeis.org/A049541 - sqrtpif = 0x1.c5bf8aP+0F, // (1.77245385) https://oeis.org/A002161 - inv_sqrtpif = 0x1.20dd76P-1F, // (.564189584) https://oeis.org/A087197 - sqrt2f = 0x1.6a09e6P+0F, // (1.41421356) https://oeis.org/A002193 - inv_sqrt2f = 0x1.6a09e6P-1F, // (.707106781) - sqrt3f = 0x1.bb67aeP+0F, // (1.73205081) https://oeis.org/A002194 - inv_sqrt3f = 0x1.279a74P-1F, // (.577350269) - phif = 0x1.9e377aP+0F; // (1.61803399) https://oeis.org/A001622 +inline constexpr float ef = e_v<float>; +inline constexpr float egammaf = egamma_v<float>; +inline constexpr float ln2f = ln2_v<float>; +inline constexpr float ln10f = ln10_v<float>; +inline constexpr float log2ef = log2e_v<float>; +inline constexpr float log10ef = log10e_v<float>; +inline constexpr float pif = pi_v<float>; +inline constexpr float inv_pif = inv_pi_v<float>; +inline constexpr float inv_sqrtpif = inv_sqrtpi_v<float>; +inline constexpr float sqrt2f = sqrt2_v<float>; +inline constexpr float inv_sqrt2f = inv_sqrt2_v<float>; +inline constexpr float sqrt3f = sqrt3_v<float>; +inline constexpr float inv_sqrt3f = inv_sqrt3_v<float>; +inline constexpr float phif = phi_v<float>; + +// sqrtpi is not in C++20 std::numbers. +template <typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>> +inline constexpr T sqrtpi_v = T(0x1.c5bf891b4ef6bP+0); // (1.7724538509055160273) https://oeis.org/A002161 +inline constexpr double sqrtpi = sqrtpi_v<double>; +inline constexpr float sqrtpif = sqrtpi_v<float>; + // These string literals are taken from below: // https://github.com/bminor/glibc/blob/8543577b04ded6d979ffcc5a818930e4d74d0645/math/math.h#L1215-L1229 constexpr const char *pis = "3.141592653589793238462643383279502884", diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h index 40709d4..a4ed712 100644 --- a/llvm/include/llvm/Support/Timer.h +++ b/llvm/include/llvm/Support/Timer.h @@ -167,7 +167,7 @@ public: /// you to declare a new timer, AND specify the region to time, all in one /// statement. All timers with the same name are merged. This is primarily /// used for debugging and for hunting performance problems. -struct NamedRegionTimer : public TimeRegion { +struct NamedRegionTimer : TimeRegion { LLVM_ABI explicit NamedRegionTimer(StringRef Name, StringRef Description, StringRef GroupName, StringRef GroupDescription, diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 0e5bc48..df75999 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -947,9 +947,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) { /*UseBlockValue*/ false)); } - ValueLatticeElement Result = TrueVal; - Result.mergeIn(FalseVal); - return Result; + TrueVal.mergeIn(FalseVal); + return TrueVal; } std::optional<ConstantRange> @@ -1778,9 +1777,8 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, assert(OptResult && "Value not available after solving"); } - ValueLatticeElement Result = *OptResult; - LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); - return Result; + LLVM_DEBUG(dbgs() << " Result = " << *OptResult << "\n"); + return *OptResult; } ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e06b095..425420f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15740,19 +15740,26 @@ void ScalarEvolution::LoopGuards::collectFromBlock( GetNextSCEVDividesByDivisor(One, DividesBy); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); } else { + // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS), + // but creating the subtraction eagerly is expensive. Track the + // inequalities in a separate map, and materialize the rewrite lazily + // when encountering a suitable subtraction while re-writing. if (LHS->getType()->isPointerTy()) { LHS = SE.getLosslessPtrToIntExpr(LHS); RHS = SE.getLosslessPtrToIntExpr(RHS); if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS)) break; } - auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) { - const SCEV *Sub = SE.getMinusSCEV(A, B); - AddRewrite(Sub, Sub, - SE.getUMaxExpr(Sub, SE.getOne(From->getType()))); - }; - AddSubRewrite(LHS, RHS); - AddSubRewrite(RHS, LHS); + const SCEVConstant *C; + const SCEV *A, *B; + if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) && + match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) { + RHS = A; + LHS = B; + } + if (LHS > RHS) + std::swap(LHS, RHS); + Guards.NotEqual.insert({LHS, RHS}); continue; } break; @@ -15886,13 +15893,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> { const DenseMap<const SCEV *, const SCEV *> ⤅ + const SmallDenseSet<std::pair<const SCEV *, const SCEV *>> ≠ SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap; public: SCEVLoopGuardRewriter(ScalarEvolution &SE, const ScalarEvolution::LoopGuards &Guards) - : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) { + : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap), + NotEqual(Guards.NotEqual) { if (Guards.PreserveNUW) FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW); if (Guards.PreserveNSW) @@ -15947,14 +15956,36 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { } const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + // Helper to check if S is a subtraction (A - B) where A != B, and if so, + // return UMax(S, 1). + auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * { + const SCEV *LHS, *RHS; + if (MatchBinarySub(S, LHS, RHS)) { + if (LHS > RHS) + std::swap(LHS, RHS); + if (NotEqual.contains({LHS, RHS})) + return SE.getUMaxExpr(S, SE.getOne(S->getType())); + } + return nullptr; + }; + + // Check if Expr itself is a subtraction pattern with guard info. + if (const SCEV *Rewritten = RewriteSubtraction(Expr)) + return Rewritten; + // Trip count expressions sometimes consist of adding 3 operands, i.e. // (Const + A + B). There may be guard info for A + B, and if so, apply // it. // TODO: Could more generally apply guards to Add sub-expressions. if (isa<SCEVConstant>(Expr->getOperand(0)) && Expr->getNumOperands() == 3) { - if (const SCEV *S = Map.lookup( - SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2)))) + const SCEV *Add = + SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2)); + if (const SCEV *Rewritten = RewriteSubtraction(Add)) + return SE.getAddExpr( + Expr->getOperand(0), Rewritten, + ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask)); + if (const SCEV *S = Map.lookup(Add)) return SE.getAddExpr(Expr->getOperand(0), S); } SmallVector<const SCEV *, 2> Operands; @@ -15989,7 +16020,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { } }; - if (RewriteMap.empty()) + if (RewriteMap.empty() && NotEqual.empty()) return Expr; SCEVLoopGuardRewriter Rewriter(SE, *this); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 8623c06..b4de79a 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -130,44 +130,46 @@ struct fltSemantics { bool hasSignBitInMSB = true; }; -static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; -static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; -static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; -static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; -static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; -static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; -static constexpr fltSemantics semFloat8E5M2FNUZ = { +constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16}; +constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16}; +constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32}; +constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64}; +constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128}; +constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8}; +constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = { 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; -static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; -static constexpr fltSemantics semFloat8E4M3FN = { +constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8}; +constexpr fltSemantics APFloatBase::semFloat8E4M3FN = { 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; -static constexpr fltSemantics semFloat8E4M3FNUZ = { +constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = { 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; -static constexpr fltSemantics semFloat8E4M3B11FNUZ = { +constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = { 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; -static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; -static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; -static constexpr fltSemantics semFloat8E8M0FNU = {127, - -127, - 1, - 8, - fltNonfiniteBehavior::NanOnly, - fltNanEncoding::AllOnes, - false, - false, - false}; - -static constexpr fltSemantics semFloat6E3M2FN = { +constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8}; +constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19}; +constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = { + 127, + -127, + 1, + 8, + fltNonfiniteBehavior::NanOnly, + fltNanEncoding::AllOnes, + false, + false, + false}; + +constexpr fltSemantics APFloatBase::semFloat6E3M2FN = { 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; -static constexpr fltSemantics semFloat6E2M3FN = { +constexpr fltSemantics APFloatBase::semFloat6E2M3FN = { 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; -static constexpr fltSemantics semFloat4E2M1FN = { +constexpr fltSemantics APFloatBase::semFloat4E2M1FN = { 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; -static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; -static constexpr fltSemantics semBogus = {0, 0, 0, 0}; -static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; -static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, - 53 + 53, 128}; +constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64, + 80}; +constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0}; +constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128}; +constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = { + 1023, -1022 + 53, 53 + 53, 128}; const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { switch (S) { @@ -261,36 +263,6 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { llvm_unreachable("Unknown floating semantics"); } -const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } -const fltSemantics &APFloatBase::BFloat() { return semBFloat; } -const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } -const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } -const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } -const fltSemantics &APFloatBase::PPCDoubleDouble() { - return semPPCDoubleDouble; -} -const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() { - return semPPCDoubleDoubleLegacy; -} -const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } -const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } -const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } -const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } -const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } -const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { - return semFloat8E4M3B11FNUZ; -} -const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } -const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } -const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; } -const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } -const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } -const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } -const fltSemantics &APFloatBase::x87DoubleExtended() { - return semX87DoubleExtended; -} -const fltSemantics &APFloatBase::Bogus() { return semBogus; } - bool APFloatBase::isRepresentableBy(const fltSemantics &A, const fltSemantics &B) { return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent && @@ -1029,7 +1001,7 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { // For x87 extended precision, we want to make a NaN, not a // pseudo-NaN. Maybe we should expose the ability to make // pseudo-NaNs? - if (semantics == &semX87DoubleExtended) + if (semantics == &APFloatBase::semX87DoubleExtended) APInt::tcSetBit(significand, QNaNBit + 1); } @@ -1054,7 +1026,7 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { category = rhs.category; sign = rhs.sign; - rhs.semantics = &semBogus; + rhs.semantics = &APFloatBase::semBogus; return *this; } @@ -1247,7 +1219,7 @@ IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { assign(rhs); } -IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { +IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) { *this = std::move(rhs); } @@ -2607,8 +2579,8 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, shift = toSemantics.precision - fromSemantics.precision; bool X86SpecialNan = false; - if (&fromSemantics == &semX87DoubleExtended && - &toSemantics != &semX87DoubleExtended && category == fcNaN && + if (&fromSemantics == &APFloatBase::semX87DoubleExtended && + &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN && (!(*significandParts() & 0x8000000000000000ULL) || !(*significandParts() & 0x4000000000000000ULL))) { // x86 has some unusual NaNs which cannot be represented in any other @@ -2694,7 +2666,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, // For x87 extended precision, we want to make a NaN, not a special NaN if // the input wasn't special either. - if (!X86SpecialNan && semantics == &semX87DoubleExtended) + if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended) APInt::tcSetBit(significandParts(), semantics->precision - 1); // Convert of sNaN creates qNaN and raises an exception (invalid op). @@ -3530,7 +3502,8 @@ hash_code hash_value(const IEEEFloat &Arg) { // the actual IEEE respresentations. We compensate for that here. APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); + assert(semantics == + (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended); assert(partCount()==2); uint64_t myexponent, mysignificand; @@ -3560,7 +3533,8 @@ APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { } APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); + assert(semantics == + (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy); assert(partCount()==2); uint64_t words[2]; @@ -3574,14 +3548,14 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const { // Declare fltSemantics before APFloat that uses it (and // saves pointer to it) to ensure correct destruction order. fltSemantics extendedSemantics = *semantics; - extendedSemantics.minExponent = semIEEEdouble.minExponent; + extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent; IEEEFloat extended(*this); fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; IEEEFloat u(extended); - fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); + fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo); assert(fs == opOK || fs == opInexact); (void)fs; words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); @@ -3597,7 +3571,7 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const { IEEEFloat v(extended); v.subtract(u, rmNearestTiesToEven); - fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); + fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); @@ -3611,8 +3585,9 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const { template <const fltSemantics &S> APInt IEEEFloat::convertIEEEFloatToAPInt() const { assert(semantics == &S); - const int bias = - (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1); + const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU) + ? -S.minExponent + : -(S.minExponent - 1); constexpr unsigned int trailing_significand_bits = S.precision - 1; constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; constexpr integerPart integer_bit = @@ -3677,87 +3652,87 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const { APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { assert(partCount() == 2); - return convertIEEEFloatToAPInt<semIEEEquad>(); + return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>(); } APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { assert(partCount()==1); - return convertIEEEFloatToAPInt<semIEEEdouble>(); + return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>(); } APInt IEEEFloat::convertFloatAPFloatToAPInt() const { assert(partCount()==1); - return convertIEEEFloatToAPInt<semIEEEsingle>(); + return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>(); } APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semBFloat>(); + return convertIEEEFloatToAPInt<APFloatBase::semBFloat>(); } APInt IEEEFloat::convertHalfAPFloatToAPInt() const { assert(partCount()==1); - return convertIEEEFloatToAPInt<semIEEEhalf>(); + return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>(); } APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E5M2>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>(); } APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>(); } APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E4M3>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>(); } APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>(); } APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>(); } APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(); } APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E3M4>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>(); } APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloatTF32>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>(); } APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat8E8M0FNU>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>(); } APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>(); } APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>(); } APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { assert(partCount() == 1); - return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); + return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>(); } // This function creates an APInt that is just a bit map of the floating @@ -3765,74 +3740,77 @@ APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { // and treating the result as a normal integer is unlikely to be useful. APInt IEEEFloat::bitcastToAPInt() const { - if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf) return convertHalfAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semBFloat) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat) return convertBFloatAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle) return convertFloatAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble) return convertDoubleAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics*)&semIEEEquad) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad) return convertQuadrupleAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) + if (semantics == + (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy) return convertPPCDoubleDoubleLegacyAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2) return convertFloat8E5M2APFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ) return convertFloat8E5M2FNUZAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3) return convertFloat8E4M3APFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN) return convertFloat8E4M3FNAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ) return convertFloat8E4M3FNUZAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) + if (semantics == + (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ) return convertFloat8E4M3B11FNUZAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4) return convertFloat8E3M4APFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloatTF32) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32) return convertFloatTF32APFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU) return convertFloat8E8M0FNUAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN) return convertFloat6E3M2FNAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN) return convertFloat6E2M3FNAPFloatToAPInt(); - if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) + if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN) return convertFloat4E2M1FNAPFloatToAPInt(); - assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && + assert(semantics == + (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); } float IEEEFloat::convertToFloat() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && + assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle && "Float semantics are not IEEEsingle"); APInt api = bitcastToAPInt(); return api.bitsToFloat(); } double IEEEFloat::convertToDouble() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && + assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble && "Float semantics are not IEEEdouble"); APInt api = bitcastToAPInt(); return api.bitsToDouble(); @@ -3840,7 +3818,7 @@ double IEEEFloat::convertToDouble() const { #ifdef HAS_IEE754_FLOAT128 float128 IEEEFloat::convertToQuad() const { - assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && + assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad && "Float semantics are not IEEEquads"); APInt api = bitcastToAPInt(); return api.bitsToQuad(); @@ -3861,7 +3839,7 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { uint64_t mysignificand = i1; uint8_t myintegerbit = mysignificand >> 63; - initialize(&semX87DoubleExtended); + initialize(&APFloatBase::semX87DoubleExtended); assert(partCount()==2); sign = static_cast<unsigned int>(i2>>15); @@ -3893,14 +3871,16 @@ void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) { // Get the first double and convert to our format. initFromDoubleAPInt(APInt(64, i1)); - fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); + fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven, + &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; // Unless we have a special case, add in second double. if (isFiniteNonZero()) { - IEEEFloat v(semIEEEdouble, APInt(64, i2)); - fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); + IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2)); + fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven, + &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; @@ -3918,7 +3898,7 @@ void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) { uint64_t val = api.getRawData()[0]; uint64_t myexponent = (val & exponent_mask); - initialize(&semFloat8E8M0FNU); + initialize(&APFloatBase::semFloat8E8M0FNU); assert(partCount() == 1); // This format has unsigned representation only @@ -4025,109 +4005,109 @@ void IEEEFloat::initFromIEEEAPInt(const APInt &api) { } void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { - initFromIEEEAPInt<semIEEEquad>(api); + initFromIEEEAPInt<APFloatBase::semIEEEquad>(api); } void IEEEFloat::initFromDoubleAPInt(const APInt &api) { - initFromIEEEAPInt<semIEEEdouble>(api); + initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api); } void IEEEFloat::initFromFloatAPInt(const APInt &api) { - initFromIEEEAPInt<semIEEEsingle>(api); + initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api); } void IEEEFloat::initFromBFloatAPInt(const APInt &api) { - initFromIEEEAPInt<semBFloat>(api); + initFromIEEEAPInt<APFloatBase::semBFloat>(api); } void IEEEFloat::initFromHalfAPInt(const APInt &api) { - initFromIEEEAPInt<semIEEEhalf>(api); + initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api); } void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E5M2>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api); } void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api); } void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E4M3>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api); } void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E4M3FN>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api); } void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api); } void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api); } void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { - initFromIEEEAPInt<semFloat8E3M4>(api); + initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api); } void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { - initFromIEEEAPInt<semFloatTF32>(api); + initFromIEEEAPInt<APFloatBase::semFloatTF32>(api); } void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat6E3M2FN>(api); + initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api); } void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat6E2M3FN>(api); + initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api); } void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { - initFromIEEEAPInt<semFloat4E2M1FN>(api); + initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api); } /// Treat api as containing the bits of a floating point number. void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { assert(api.getBitWidth() == Sem->sizeInBits); - if (Sem == &semIEEEhalf) + if (Sem == &APFloatBase::semIEEEhalf) return initFromHalfAPInt(api); - if (Sem == &semBFloat) + if (Sem == &APFloatBase::semBFloat) return initFromBFloatAPInt(api); - if (Sem == &semIEEEsingle) + if (Sem == &APFloatBase::semIEEEsingle) return initFromFloatAPInt(api); - if (Sem == &semIEEEdouble) + if (Sem == &APFloatBase::semIEEEdouble) return initFromDoubleAPInt(api); - if (Sem == &semX87DoubleExtended) + if (Sem == &APFloatBase::semX87DoubleExtended) return initFromF80LongDoubleAPInt(api); - if (Sem == &semIEEEquad) + if (Sem == &APFloatBase::semIEEEquad) return initFromQuadrupleAPInt(api); - if (Sem == &semPPCDoubleDoubleLegacy) + if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy) return initFromPPCDoubleDoubleLegacyAPInt(api); - if (Sem == &semFloat8E5M2) + if (Sem == &APFloatBase::semFloat8E5M2) return initFromFloat8E5M2APInt(api); - if (Sem == &semFloat8E5M2FNUZ) + if (Sem == &APFloatBase::semFloat8E5M2FNUZ) return initFromFloat8E5M2FNUZAPInt(api); - if (Sem == &semFloat8E4M3) + if (Sem == &APFloatBase::semFloat8E4M3) return initFromFloat8E4M3APInt(api); - if (Sem == &semFloat8E4M3FN) + if (Sem == &APFloatBase::semFloat8E4M3FN) return initFromFloat8E4M3FNAPInt(api); - if (Sem == &semFloat8E4M3FNUZ) + if (Sem == &APFloatBase::semFloat8E4M3FNUZ) return initFromFloat8E4M3FNUZAPInt(api); - if (Sem == &semFloat8E4M3B11FNUZ) + if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ) return initFromFloat8E4M3B11FNUZAPInt(api); - if (Sem == &semFloat8E3M4) + if (Sem == &APFloatBase::semFloat8E3M4) return initFromFloat8E3M4APInt(api); - if (Sem == &semFloatTF32) + if (Sem == &APFloatBase::semFloatTF32) return initFromFloatTF32APInt(api); - if (Sem == &semFloat8E8M0FNU) + if (Sem == &APFloatBase::semFloat8E8M0FNU) return initFromFloat8E8M0FNUAPInt(api); - if (Sem == &semFloat6E3M2FN) + if (Sem == &APFloatBase::semFloat6E3M2FN) return initFromFloat6E3M2FNAPInt(api); - if (Sem == &semFloat6E2M3FN) + if (Sem == &APFloatBase::semFloat6E2M3FN) return initFromFloat6E2M3FNAPInt(api); - if (Sem == &semFloat4E2M1FN) + if (Sem == &APFloatBase::semFloat4E2M1FN) return initFromFloat4E2M1FNAPInt(api); llvm_unreachable("unsupported semantics"); @@ -4202,11 +4182,11 @@ IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { } IEEEFloat::IEEEFloat(float f) { - initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); + initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f)); } IEEEFloat::IEEEFloat(double d) { - initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); + initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d)); } namespace { @@ -4815,38 +4795,40 @@ IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) { DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) : Semantics(&S), - Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { - assert(Semantics == &semPPCDoubleDouble); + Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble), + APFloat(APFloatBase::semIEEEdouble)}) { + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) - : Semantics(&S), - Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), - APFloat(semIEEEdouble, uninitialized)}) { - assert(Semantics == &semPPCDoubleDouble); + : Semantics(&S), Floats(new APFloat[2]{ + APFloat(APFloatBase::semIEEEdouble, uninitialized), + APFloat(APFloatBase::semIEEEdouble, uninitialized)}) { + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) - : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), - APFloat(semIEEEdouble)}) { - assert(Semantics == &semPPCDoubleDouble); + : Semantics(&S), + Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I), + APFloat(APFloatBase::semIEEEdouble)}) { + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) : Semantics(&S), Floats(new APFloat[2]{ - APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), - APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { - assert(Semantics == &semPPCDoubleDouble); + APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])), + APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) { + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, APFloat &&Second) : Semantics(&S), Floats(new APFloat[2]{std::move(First), std::move(Second)}) { - assert(Semantics == &semPPCDoubleDouble); - assert(&Floats[0].getSemantics() == &semIEEEdouble); - assert(&Floats[1].getSemantics() == &semIEEEdouble); + assert(Semantics == &APFloatBase::semPPCDoubleDouble); + assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble); + assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble); } DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) @@ -4854,14 +4836,14 @@ DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), APFloat(RHS.Floats[1])} : nullptr) { - assert(Semantics == &semPPCDoubleDouble); + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) : Semantics(RHS.Semantics), Floats(RHS.Floats) { - RHS.Semantics = &semBogus; + RHS.Semantics = &APFloatBase::semBogus; RHS.Floats = nullptr; - assert(Semantics == &semPPCDoubleDouble); + assert(Semantics == &APFloatBase::semPPCDoubleDouble); } DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { @@ -5009,12 +4991,12 @@ APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), CC(RHS.Floats[1]); - assert(&A.getSemantics() == &semIEEEdouble); - assert(&AA.getSemantics() == &semIEEEdouble); - assert(&C.getSemantics() == &semIEEEdouble); - assert(&CC.getSemantics() == &semIEEEdouble); - assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); - assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); + assert(&A.getSemantics() == &APFloatBase::semIEEEdouble); + assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble); + assert(&C.getSemantics() == &APFloatBase::semIEEEdouble); + assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble); + assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble); + assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble); return Out.addImpl(A, AA, C, CC, RM); } @@ -5119,28 +5101,32 @@ APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, APFloat::roundingMode RM) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); - auto Ret = - Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.divide( + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); + *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt()); return Ret; } APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); - auto Ret = - Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.remainder( + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); + *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt()); return Ret; } APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); - auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.mod( + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); + *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt()); return Ret; } @@ -5148,17 +5134,21 @@ APFloat::opStatus DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, APFloat::roundingMode RM) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()); auto Ret = Tmp.fusedMultiplyAdd( - APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), - APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, + Multiplicand.bitcastToAPInt()), + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), + RM); + *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt()); return Ret; } APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); const APFloat &Hi = getFirst(); const APFloat &Lo = getSecond(); @@ -5309,22 +5299,28 @@ void DoubleAPFloat::makeZero(bool Neg) { } void DoubleAPFloat::makeLargest(bool Neg) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); - Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + Floats[0] = + APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); + Floats[1] = + APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); if (Neg) changeSign(); } void DoubleAPFloat::makeSmallest(bool Neg) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); Floats[0].makeSmallest(Neg); Floats[1].makeZero(/* Neg = */ false); } void DoubleAPFloat::makeSmallestNormalized(bool Neg) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + Floats[0] = + APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull)); if (Neg) Floats[0].changeSign(); Floats[1].makeZero(/* Neg = */ false); @@ -5355,7 +5351,8 @@ hash_code hash_value(const DoubleAPFloat &Arg) { } APInt DoubleAPFloat::bitcastToAPInt() const { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); uint64_t Data[] = { Floats[0].bitcastToAPInt().getRawData()[0], Floats[1].bitcastToAPInt().getRawData()[0], @@ -5365,10 +5362,11 @@ APInt DoubleAPFloat::bitcastToAPInt() const { Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, roundingMode RM) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy); auto Ret = Tmp.convertFromString(S, RM); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt()); return Ret; } @@ -5379,7 +5377,8 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, // nextUp must choose the smallest output > input that follows these rules. // nexDown must choose the largest output < input that follows these rules. APFloat::opStatus DoubleAPFloat::next(bool nextDown) { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); // nextDown(x) = -nextUp(-x) if (nextDown) { changeSign(); @@ -5481,7 +5480,8 @@ APFloat::opStatus DoubleAPFloat::next(bool nextDown) { APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger( MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); // If Hi is not finite, or Lo is zero, the value is entirely represented // by Hi. Delegate to the simpler single-APFloat conversion. @@ -5761,8 +5761,9 @@ unsigned int DoubleAPFloat::convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()) .convertToHexString(DST, HexDigits, UpperCase, RM); } @@ -5799,7 +5800,8 @@ bool DoubleAPFloat::isLargest() const { } bool DoubleAPFloat::isInteger() const { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); return Floats[0].isInteger() && Floats[1].isInteger(); } @@ -5807,8 +5809,9 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero) const { - assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) + assert(Semantics == &APFloatBase::semPPCDoubleDouble && + "Unexpected Semantics"); + APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt()) .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); } @@ -5840,14 +5843,17 @@ int ilogb(const DoubleAPFloat &Arg) { DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, APFloat::roundingMode RM) { - assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), + assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() && + "Unexpected Semantics"); + return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), + scalbn(Arg.Floats[0], Exp, RM), scalbn(Arg.Floats[1], Exp, RM)); } DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, APFloat::roundingMode RM) { - assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() && + "Unexpected Semantics"); // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in // [1.0, 2.0). @@ -5943,7 +5949,8 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, } APFloat First = scalbn(Hi, -Exp, RM); - return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); + return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), std::move(First), + std::move(Second)); } } // namespace detail @@ -5955,9 +5962,8 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { } if (usesLayout<DoubleAPFloat>(Semantics)) { const fltSemantics& S = F.getSemantics(); - new (&Double) - DoubleAPFloat(Semantics, APFloat(std::move(F), S), - APFloat(semIEEEdouble)); + new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S), + APFloat(APFloatBase::IEEEdouble())); return; } llvm_unreachable("Unexpected semantics"); @@ -6065,8 +6071,9 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, return U.IEEE.convert(ToSemantics, RM, losesInfo); if (usesLayout<IEEEFloat>(getSemantics()) && usesLayout<DoubleAPFloat>(ToSemantics)) { - assert(&ToSemantics == &semPPCDoubleDouble); - auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); + assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble); + auto Ret = + U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo); *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); return Ret; } @@ -6113,13 +6120,15 @@ APFloat::opStatus APFloat::convertToInteger(APSInt &result, } double APFloat::convertToDouble() const { - if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) + if (&getSemantics() == + (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble) return getIEEE().convertToDouble(); assert(isRepresentableBy(getSemantics(), semIEEEdouble) && "Float semantics is not representable by IEEEdouble"); APFloat Temp = *this; bool LosesInfo; - opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); + opStatus St = + Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo); assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); (void)St; return Temp.getIEEE().convertToDouble(); @@ -6127,13 +6136,14 @@ double APFloat::convertToDouble() const { #ifdef HAS_IEE754_FLOAT128 float128 APFloat::convertToQuad() const { - if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) + if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad) return getIEEE().convertToQuad(); assert(isRepresentableBy(getSemantics(), semIEEEquad) && "Float semantics is not representable by IEEEquad"); APFloat Temp = *this; bool LosesInfo; - opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); + opStatus St = + Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo); assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); (void)St; return Temp.getIEEE().convertToQuad(); @@ -6141,18 +6151,84 @@ float128 APFloat::convertToQuad() const { #endif float APFloat::convertToFloat() const { - if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) + if (&getSemantics() == + (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle) return getIEEE().convertToFloat(); assert(isRepresentableBy(getSemantics(), semIEEEsingle) && "Float semantics is not representable by IEEEsingle"); APFloat Temp = *this; bool LosesInfo; - opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); + opStatus St = + Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo); assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); (void)St; return Temp.getIEEE().convertToFloat(); } +APFloat::Storage::~Storage() { + if (usesLayout<IEEEFloat>(*semantics)) { + IEEE.~IEEEFloat(); + return; + } + if (usesLayout<DoubleAPFloat>(*semantics)) { + Double.~DoubleAPFloat(); + return; + } + llvm_unreachable("Unexpected semantics"); +} + +APFloat::Storage::Storage(const APFloat::Storage &RHS) { + if (usesLayout<IEEEFloat>(*RHS.semantics)) { + new (this) IEEEFloat(RHS.IEEE); + return; + } + if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { + new (this) DoubleAPFloat(RHS.Double); + return; + } + llvm_unreachable("Unexpected semantics"); +} + +APFloat::Storage::Storage(APFloat::Storage &&RHS) { + if (usesLayout<IEEEFloat>(*RHS.semantics)) { + new (this) IEEEFloat(std::move(RHS.IEEE)); + return; + } + if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { + new (this) DoubleAPFloat(std::move(RHS.Double)); + return; + } + llvm_unreachable("Unexpected semantics"); +} + +APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) { + if (usesLayout<IEEEFloat>(*semantics) && + usesLayout<IEEEFloat>(*RHS.semantics)) { + IEEE = RHS.IEEE; + } else if (usesLayout<DoubleAPFloat>(*semantics) && + usesLayout<DoubleAPFloat>(*RHS.semantics)) { + Double = RHS.Double; + } else if (this != &RHS) { + this->~Storage(); + new (this) Storage(RHS); + } + return *this; +} + +APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) { + if (usesLayout<IEEEFloat>(*semantics) && + usesLayout<IEEEFloat>(*RHS.semantics)) { + IEEE = std::move(RHS.IEEE); + } else if (usesLayout<DoubleAPFloat>(*semantics) && + usesLayout<DoubleAPFloat>(*RHS.semantics)) { + Double = std::move(RHS.Double); + } else if (this != &RHS) { + this->~Storage(); + new (this) Storage(std::move(RHS)); + } + return *this; +} + } // namespace llvm #undef APFLOAT_DISPATCH_ON_SEMANTICS diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 979a8b0..4b22c68 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/RegisterPressure.h" #include <algorithm> +#include <array> namespace llvm { @@ -45,7 +46,7 @@ struct GCNRegPressure { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR]; } - void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } + void clear() { Value.fill(0); } unsigned getNumRegs(RegKind Kind) const { assert(Kind < TOTAL_KINDS); @@ -127,9 +128,7 @@ struct GCNRegPressure { bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const; - bool operator==(const GCNRegPressure &O) const { - return std::equal(&Value[0], &Value[ValueArraySize], O.Value); - } + bool operator==(const GCNRegPressure &O) const { return Value == O.Value; } bool operator!=(const GCNRegPressure &O) const { return !(*this == O); @@ -160,7 +159,7 @@ private: /// Pressure for all register kinds (first all regular registers kinds, then /// all tuple register kinds). - unsigned Value[ValueArraySize]; + std::array<unsigned, ValueArraySize> Value; static unsigned getRegKind(const TargetRegisterClass *RC, const SIRegisterInfo *STI); diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 2aa54c9..09ef6ac 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom); + // 32-bit ABS is legal for AMDGPU except for R600 + setOperationAction(ISD::ABS, MVT::i32, Expand); + // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address // spaces, so it is custom lowered to handle those where it isn't. for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 1b7cb9b..636e31c 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -699,7 +699,8 @@ public: "Can't encode VTYPE for uninitialized or unknown"); if (TWiden != 0) return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt); - return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, + AltFmt); } bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index ddb53a2..12f776b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3775,11 +3775,13 @@ std::string RISCVInstrInfo::createMIROperandComment( #define CASE_VFMA_OPCODE_VV(OP) \ CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \ + case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \ case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64) #define CASE_VFMA_SPLATS(OP) \ CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \ + case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \ case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64) // clang-format on @@ -4003,11 +4005,13 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64) #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64) // clang-format on @@ -4469,6 +4473,20 @@ bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const { CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \ + +#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \ + CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \ + case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \ + case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \ + case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \ + case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16) + +#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \ + CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \ + CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \ + CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \ + CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \ + CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) // clang-format on MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, @@ -4478,6 +4496,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, switch (MI.getOpcode()) { default: return nullptr; + case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV): + case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV): case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV): case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): { assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && @@ -4494,6 +4514,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, llvm_unreachable("Unexpected opcode"); CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV) CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV) + CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV) + CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV) } // clang-format on diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index c9c1246..9358486 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -44,6 +44,336 @@ let Predicates = [HasStdExtZvfbfmin] in { let mayRaiseFPException = true, Predicates = [HasStdExtZvfbfwma] in defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM; +defset list<VTypeInfoToWide> AllWidenableIntToBF16Vectors = { + def : VTypeInfoToWide<VI8MF8, VBF16MF4>; + def : VTypeInfoToWide<VI8MF4, VBF16MF2>; + def : VTypeInfoToWide<VI8MF2, VBF16M1>; + def : VTypeInfoToWide<VI8M1, VBF16M2>; + def : VTypeInfoToWide<VI8M2, VBF16M4>; + def : VTypeInfoToWide<VI8M4, VBF16M8>; +} + +multiclass VPseudoVALU_VV_VF_RM_BF16 { + foreach m = MxListF in { + defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>, + SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>, + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVALU_VF_RM_BF16 { + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>, + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVFWALU_VV_VF_RM_BF16 { + foreach m = MxListFW in { + defm "" : VPseudoBinaryW_VV_RM<m, sew=16>, + SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxListFW in { + defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>, + SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVFWALU_WV_WF_RM_BF16 { + foreach m = MxListFW in { + defm "" : VPseudoBinaryW_WV_RM<m, sew=16>, + SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + defvar f = SCALAR_F16; + foreach m = f.MxListFW in { + defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>, + SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVFMUL_VV_VF_RM_BF16 { + foreach m = MxListF in { + defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>, + SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>, + SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVWMUL_VV_VF_RM_BF16 { + foreach m = MxListFW in { + defm "" : VPseudoBinaryW_VV_RM<m, sew=16>, + SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxListFW in { + defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>, + SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVMAC_VV_VF_AAXA_RM_BF16 { + foreach m = MxListF in { + defm "" : VPseudoTernaryV_VV_AAXA_RM<m, 16/*sew*/>, + SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV", + "ReadVFMulAddV", m.MX, 16/*sew*/>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, f.SEW>, + SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", + "ReadVFMulAddV", m.MX, f.SEW>; + } +} + +multiclass VPseudoVWMAC_VV_VF_RM_BF16 { + foreach m = MxListFW in { + defm "" : VPseudoTernaryW_VV_RM<m, sew=16>, + SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV", + "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, 16/*sew*/>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxListFW in { + defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>, + SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV", + "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>; + } +} + +multiclass VPseudoVRCP_V_BF16 { + foreach m = MxListF in { + defvar mx = m.MX; + let VLMul = m.value in { + def "_V_" # mx # "_E16" + : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/, + forcePassthruRead=true>; + def "_V_" # mx # "_E16_MASK" + : VPseudoUnaryMask<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/, + forcePassthruRead=true>; + } + } +} + +multiclass VPseudoVRCP_V_RM_BF16 { + foreach m = MxListF in { + defvar mx = m.MX; + let VLMul = m.value in { + def "_V_" # mx # "_E16" + : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/, + forcePassthruRead=true>; + def "_V_" # mx # "_E16_MASK" + : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/, + forcePassthruRead=true>; + } + } +} + +multiclass VPseudoVMAX_VV_VF_BF16 { + foreach m = MxListF in { + defm "" : VPseudoBinaryV_VV<m, sew=16>, + SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", + m.MX, 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoBinaryV_VF<m, f, f.SEW>, + SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", + m.MX, f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVSGNJ_VV_VF_BF16 { + foreach m = MxListF in { + defm "" : VPseudoBinaryV_VV<m, sew=16>, + SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX, + 16/*sew*/, forcePassthruRead=true>; + } + + defvar f = SCALAR_F16; + foreach m = f.MxList in { + defm "" : VPseudoBinaryV_VF<m, f, f.SEW>, + SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX, + f.SEW, forcePassthruRead=true>; + } +} + +multiclass VPseudoVWCVTF_V_BF16 { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListW in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8, + TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 8/*sew*/, + forcePassthruRead=true>; +} + +multiclass VPseudoVWCVTD_V_BF16 { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListFW in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=16, + TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, 16/*sew*/, + forcePassthruRead=true>; +} + +multiclass VPseudoVNCVTD_W_BF16 { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListFW in + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=16, + TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/, + forcePassthruRead=true>; +} + +multiclass VPseudoVNCVTD_W_RM_BF16 { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListFW in + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, + constraint, sew=16, + TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/, + forcePassthruRead=true>; +} + +let Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT in { +let mayRaiseFPException = true in { +defm PseudoVFADD_ALT : VPseudoVALU_VV_VF_RM_BF16; +defm PseudoVFSUB_ALT : VPseudoVALU_VV_VF_RM_BF16; +defm PseudoVFRSUB_ALT : VPseudoVALU_VF_RM_BF16; +} + +let mayRaiseFPException = true in { +defm PseudoVFWADD_ALT : VPseudoVFWALU_VV_VF_RM_BF16; +defm PseudoVFWSUB_ALT : VPseudoVFWALU_VV_VF_RM_BF16; +defm PseudoVFWADD_ALT : VPseudoVFWALU_WV_WF_RM_BF16; +defm PseudoVFWSUB_ALT : VPseudoVFWALU_WV_WF_RM_BF16; +} + +let mayRaiseFPException = true in +defm PseudoVFMUL_ALT : VPseudoVFMUL_VV_VF_RM_BF16; + +let mayRaiseFPException = true in +defm PseudoVFWMUL_ALT : VPseudoVWMUL_VV_VF_RM_BF16; + +let mayRaiseFPException = true in { +defm PseudoVFMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFNMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFNMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFNMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +defm PseudoVFNMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16; +} + +let mayRaiseFPException = true in { +defm PseudoVFWMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16; +defm PseudoVFWNMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16; +defm PseudoVFWMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16; +defm PseudoVFWNMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16; +} + +let mayRaiseFPException = true in +defm PseudoVFRSQRT7_ALT : VPseudoVRCP_V_BF16; + +let mayRaiseFPException = true in +defm PseudoVFREC7_ALT : VPseudoVRCP_V_RM_BF16; + +let mayRaiseFPException = true in { +defm PseudoVFMIN_ALT : VPseudoVMAX_VV_VF_BF16; +defm PseudoVFMAX_ALT : VPseudoVMAX_VV_VF_BF16; +} + +defm PseudoVFSGNJ_ALT : VPseudoVSGNJ_VV_VF_BF16; +defm PseudoVFSGNJN_ALT : VPseudoVSGNJ_VV_VF_BF16; +defm PseudoVFSGNJX_ALT : VPseudoVSGNJ_VV_VF_BF16; + +let mayRaiseFPException = true in { +defm PseudoVMFEQ_ALT : VPseudoVCMPM_VV_VF; +defm PseudoVMFNE_ALT : VPseudoVCMPM_VV_VF; +defm PseudoVMFLT_ALT : VPseudoVCMPM_VV_VF; +defm PseudoVMFLE_ALT : VPseudoVCMPM_VV_VF; +defm PseudoVMFGT_ALT : VPseudoVCMPM_VF; +defm PseudoVMFGE_ALT : VPseudoVCMPM_VF; +} + +defm PseudoVFCLASS_ALT : VPseudoVCLS_V; + +defm PseudoVFMERGE_ALT : VPseudoVMRG_FM; + +defm PseudoVFMV_V_ALT : VPseudoVMV_F; + +let mayRaiseFPException = true in { +defm PseudoVFWCVT_F_XU_ALT : VPseudoVWCVTF_V_BF16; +defm PseudoVFWCVT_F_X_ALT : VPseudoVWCVTF_V_BF16; + +defm PseudoVFWCVT_F_F_ALT : VPseudoVWCVTD_V_BF16; +} // mayRaiseFPException = true + +let mayRaiseFPException = true in { +let hasSideEffects = 0, hasPostISelHook = 1 in { +defm PseudoVFNCVT_XU_F_ALT : VPseudoVNCVTI_W_RM; +defm PseudoVFNCVT_X_F_ALT : VPseudoVNCVTI_W_RM; +} + +defm PseudoVFNCVT_RTZ_XU_F_ALT : VPseudoVNCVTI_W; +defm PseudoVFNCVT_RTZ_X_F_ALT : VPseudoVNCVTI_W; + +defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_W_RM_BF16; + +defm PseudoVFNCVT_ROD_F_F_ALT : VPseudoVNCVTD_W_BF16; +} // mayRaiseFPException = true + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + defvar f = SCALAR_F16; + let HasSEWOp = 1, BaseInstr = VFMV_F_S in + def "PseudoVFMV_" # f.FX # "_S_ALT" : + RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>, + Sched<[WriteVMovFS, ReadVMovFS]>; + let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1, + Constraints = "$rd = $passthru" in + def "PseudoVFMV_S_" # f.FX # "_ALT" : + RISCVVPseudo<(outs VR:$rd), + (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>, + Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>; +} + +defm PseudoVFSLIDE1UP_ALT : VPseudoVSLD1_VF<"@earlyclobber $rd">; +defm PseudoVFSLIDE1DOWN_ALT : VPseudoVSLD1_VF; +} // Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT + //===----------------------------------------------------------------------===// // Patterns //===----------------------------------------------------------------------===// @@ -108,6 +438,130 @@ let Predicates = [HasStdExtZvfbfmin] in { FRM_DYN, fvti.AVL, fvti.Log2SEW, TA_MA)>; } + + defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>; + defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER", + AllBF16Vectors, uimm5>; + defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16", + eew=16, vtilist=AllBF16Vectors>; + defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>; + defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>; + + foreach fvti = AllBF16Vectors in { + defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM", + fvti.Vector, + fvti.Vector, fvti.Vector, fvti.Mask, + fvti.Log2SEW, fvti.LMul, fvti.RegClass, + fvti.RegClass, fvti.RegClass>; + defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE", + "V"#fvti.ScalarSuffix#"M", + fvti.Vector, + fvti.Vector, fvti.Scalar, fvti.Mask, + fvti.Log2SEW, fvti.LMul, fvti.RegClass, + fvti.RegClass, fvti.ScalarRegClass>; + defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX); + def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru), + (fvti.Vector fvti.RegClass:$rs2), + (fvti.Scalar (fpimm0)), + (fvti.Mask VMV0:$vm), VLOpFrag)), + (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; + + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), + fvti.AVL, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), + fvti.RegClass:$rs1, + fvti.RegClass:$rs2, + fvti.RegClass:$passthru, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) + fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), + fvti.RegClass:$rs2, + fvti.RegClass:$passthru, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX) + fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW)>; + + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2, + fvti.RegClass:$passthru, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + fvti.RegClass:$passthru, + VLOpFrag)), + (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + fvti.RegClass:$passthru, fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector + (riscv_vrgather_vv_vl fvti.RegClass:$rs2, + (ivti.Vector fvti.RegClass:$rs1), + fvti.RegClass:$passthru, + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK") + fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1, + fvti.RegClass:$passthru, + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK") + fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1, + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(fvti.Vector + (riscv_vrgather_vx_vl fvti.RegClass:$rs2, + uimm5:$imm, + fvti.RegClass:$passthru, + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK") + fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm, + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; + } } let Predicates = [HasStdExtZvfbfwma] in { @@ -118,3 +572,224 @@ let Predicates = [HasStdExtZvfbfwma] in { defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16", AllWidenableBF16ToFloatVectors>; } + +multiclass VPatConversionVI_VF_BF16<string intrinsic, string instruction> { + foreach fvti = AllBF16Vectors in { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<ivti>.Predicates) in + defm : VPatConversion<intrinsic, instruction, "V", + ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW, + fvti.LMul, ivti.RegClass, fvti.RegClass>; + } +} + +multiclass VPatConversionWF_VI_BF16<string intrinsic, string instruction, + bit isSEWAware = 0> { + foreach vtiToWti = AllWidenableIntToBF16Vectors in { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in + defm : VPatConversion<intrinsic, instruction, "V", + fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW, + vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>; + } +} + +multiclass VPatConversionWF_VF_BF16<string intrinsic, string instruction, + bit isSEWAware = 0> { + foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = !listconcat(GetVTypeMinimalPredicates<fvti>.Predicates, + GetVTypeMinimalPredicates<fwti>.Predicates) in + defm : VPatConversion<intrinsic, instruction, "V", + fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW, + fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>; + } +} + +multiclass VPatConversionVI_WF_BF16<string intrinsic, string instruction> { + foreach vtiToWti = AllWidenableIntToBF16Vectors in { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in + defm : VPatConversion<intrinsic, instruction, "W", + vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW, + vti.LMul, vti.RegClass, fwti.RegClass>; + } +} + +multiclass VPatConversionVI_WF_RM_BF16<string intrinsic, string instruction> { + foreach vtiToWti = AllWidenableIntToBF16Vectors in { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in + defm : VPatConversionRoundingMode<intrinsic, instruction, "W", + vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW, + vti.LMul, vti.RegClass, fwti.RegClass>; + } +} + +multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction, + bit isSEWAware = 0> { + foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in + defm : VPatConversion<intrinsic, instruction, "W", + fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW, + fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>; + } +} + +let Predicates = [HasStdExtZvfbfa] in { +defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT", + AllBF16Vectors, isSEWAware = 1>; +defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB_ALT", + AllBF16Vectors, isSEWAware = 1>; +defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB_ALT", + AllBF16Vectors, isSEWAware = 1>; +defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX_ALT", + AllBF16Vectors, isSEWAware=1>; +defm : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ_ALT", AllBF16Vectors>; +defm : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE_ALT", AllBF16Vectors>; +defm : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT_ALT", AllBF16Vectors>; +defm : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE_ALT", AllBF16Vectors>; +defm : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT_ALT", AllBF16Vectors>; +defm : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE_ALT", AllBF16Vectors>; +defm : VPatBinarySwappedM_VV<"int_riscv_vmfgt", "PseudoVMFLT_ALT", AllBF16Vectors>; +defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE_ALT", AllBF16Vectors>; +defm : VPatConversionVI_VF_BF16<"int_riscv_vfclass", "PseudoVFCLASS_ALT">; +foreach vti = AllBF16Vectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in + defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE_ALT", + "V"#vti.ScalarSuffix#"M", + vti.Vector, + vti.Vector, vti.Scalar, vti.Mask, + vti.Log2SEW, vti.LMul, vti.RegClass, + vti.RegClass, vti.ScalarRegClass>; +} +defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU_ALT", + isSEWAware=1>; +defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X_ALT", + isSEWAware=1>; +defm : VPatConversionWF_VF_BF16<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F_ALT", + isSEWAware=1>; +defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F_ALT">; +defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F_ALT">; +defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F_ALT">; +defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F_ALT">; +defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F_ALT", + AllWidenableBF16ToFloatVectors, isSEWAware=1>; +defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F_ALT", + isSEWAware=1>; +defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>; +defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>; + +foreach fvti = AllBF16Vectors in { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + let Predicates = GetVTypePredicates<ivti>.Predicates in { + // 13.16. Vector Floating-Point Move Instruction + // If we're splatting fpimm0, use vmv.v.x vd, x0. + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl + fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)), + (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX) + $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl + fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), + (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX) + $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; + } + + let Predicates = GetVTypePredicates<fvti>.Predicates in { + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl + fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), + (!cast<Instruction>("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" # + fvti.LMul.MX) + $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), + GPR:$vl, fvti.Log2SEW, TU_MU)>; + } +} + +foreach vti = NoGroupBF16Vectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), + (vti.Scalar (fpimm0)), + VLOpFrag)), + (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), + (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), + VLOpFrag)), + (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), + vti.ScalarRegClass:$rs1, + VLOpFrag)), + (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT") + vti.RegClass:$passthru, + (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; + } + + defvar vfmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_", + vti.ScalarSuffix, + "_S_ALT")); + // Only pattern-match extract-element operations where the index is 0. Any + // other index will have been custom-lowered to slide the vector correctly + // into place. + let Predicates = GetVTypePredicates<vti>.Predicates in + def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)), + (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>; +} +} // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 6acf799..334db4b 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -288,9 +288,12 @@ public: bool hasVInstructionsI64() const { return HasStdExtZve64x; } bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; } bool hasVInstructionsF16() const { return HasStdExtZvfh; } - bool hasVInstructionsBF16Minimal() const { return HasStdExtZvfbfmin; } + bool hasVInstructionsBF16Minimal() const { + return HasStdExtZvfbfmin || HasStdExtZvfbfa; + } bool hasVInstructionsF32() const { return HasStdExtZve32f; } bool hasVInstructionsF64() const { return HasStdExtZve64d; } + bool hasVInstructionsBF16() const { return HasStdExtZvfbfa; } // F16 and F64 both require F32. bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); } bool hasVInstructionsFullMultiply() const { return HasStdExtV; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2feee05..b05d7c7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44813,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( } case X86ISD::PCMPGT: // icmp sgt(0, R) == ashr(R, BitWidth-1). - // iff we only need the sign bit then we can use R directly. - if (OriginalDemandedBits.isSignMask() && - ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) { + // iff we only need the signbit then we can use R directly. + if (OriginalDemandedBits.isSignMask()) + return TLO.CombineTo(Op, Op.getOperand(1)); + // otherwise we just need R's signbit for the comparison. + APInt SignMask = APInt::getSignMask(BitWidth); + if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts, + Known, TLO, Depth + 1)) + return true; + } break; case X86ISD::MOVMSK: { SDValue Src = Op.getOperand(0); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 09cb225..a8eb9b9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3757,6 +3757,10 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder, // (x < y) ? -1 : zext(x > y) // (x > y) ? 1 : sext(x != y) // (x > y) ? 1 : sext(x < y) +// (x == y) ? 0 : (x > y ? 1 : -1) +// (x == y) ? 0 : (x < y ? -1 : 1) +// Special case: x == C ? 0 : (x > C - 1 ? 1 : -1) +// Special case: x == C ? 0 : (x < C + 1 ? -1 : 1) // Into ucmp/scmp(x, y), where signedness is determined by the signedness // of the comparison in the original sequence. Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) { @@ -3849,6 +3853,44 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) { } } + // Special cases with constants: x == C ? 0 : (x > C-1 ? 1 : -1) + if (Pred == ICmpInst::ICMP_EQ && match(TV, m_Zero())) { + const APInt *C; + if (match(RHS, m_APInt(C))) { + CmpPredicate InnerPred; + Value *InnerRHS; + const APInt *InnerTV, *InnerFV; + if (match(FV, + m_Select(m_ICmp(InnerPred, m_Specific(LHS), m_Value(InnerRHS)), + m_APInt(InnerTV), m_APInt(InnerFV)))) { + + // x == C ? 0 : (x > C-1 ? 1 : -1) + if (ICmpInst::isGT(InnerPred) && InnerTV->isOne() && + InnerFV->isAllOnes()) { + IsSigned = ICmpInst::isSigned(InnerPred); + bool CanSubOne = IsSigned ? !C->isMinSignedValue() : !C->isMinValue(); + if (CanSubOne) { + APInt Cminus1 = *C - 1; + if (match(InnerRHS, m_SpecificInt(Cminus1))) + Replace = true; + } + } + + // x == C ? 0 : (x < C+1 ? -1 : 1) + if (ICmpInst::isLT(InnerPred) && InnerTV->isAllOnes() && + InnerFV->isOne()) { + IsSigned = ICmpInst::isSigned(InnerPred); + bool CanAddOne = IsSigned ? !C->isMaxSignedValue() : !C->isMaxValue(); + if (CanAddOne) { + APInt Cplus1 = *C + 1; + if (match(InnerRHS, m_SpecificInt(Cplus1))) + Replace = true; + } + } + } + } + } + Intrinsic::ID IID = IsSigned ? Intrinsic::scmp : Intrinsic::ucmp; if (Replace) return replaceInstUsesWith( diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 9693ae6..4947d03 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -634,18 +635,10 @@ private: /// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV /// changes. bool mergeInValue(ValueLatticeElement &IV, Value *V, - ValueLatticeElement MergeWithV, + const ValueLatticeElement &MergeWithV, ValueLatticeElement::MergeOptions Opts = { /*MayIncludeUndef=*/false, /*CheckWiden=*/false}); - bool mergeInValue(Value *V, ValueLatticeElement MergeWithV, - ValueLatticeElement::MergeOptions Opts = { - /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { - assert(!V->getType()->isStructTy() && - "non-structs should use markConstant"); - return mergeInValue(ValueState[V], V, MergeWithV, Opts); - } - /// getValueState - Return the ValueLatticeElement object that corresponds to /// the value. This function handles the case when the value hasn't been seen /// yet by properly seeding constants etc. @@ -768,6 +761,7 @@ private: void handleCallArguments(CallBase &CB); void handleExtractOfWithOverflow(ExtractValueInst &EVI, const WithOverflowInst *WO, unsigned Idx); + bool isInstFullyOverDefined(Instruction &Inst); private: friend class InstVisitor<SCCPInstVisitor>; @@ -987,7 +981,7 @@ public: void trackValueOfArgument(Argument *A) { if (A->getType()->isStructTy()) return (void)markOverdefined(A); - mergeInValue(A, getArgAttributeVL(A)); + mergeInValue(ValueState[A], A, getArgAttributeVL(A)); } bool isStructLatticeConstant(Function *F, StructType *STy); @@ -1128,8 +1122,7 @@ bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i)); assert(It != TrackedMultipleRetVals.end()); - ValueLatticeElement LV = It->second; - if (!SCCPSolver::isConstant(LV)) + if (!SCCPSolver::isConstant(It->second)) return false; } return true; @@ -1160,7 +1153,7 @@ Constant *SCCPInstVisitor::getConstantOrNull(Value *V) const { std::vector<Constant *> ConstVals; auto *ST = cast<StructType>(V->getType()); for (unsigned I = 0, E = ST->getNumElements(); I != E; ++I) { - ValueLatticeElement LV = LVs[I]; + const ValueLatticeElement &LV = LVs[I]; ConstVals.push_back(SCCPSolver::isConstant(LV) ? getConstant(LV, ST->getElementType(I)) : UndefValue::get(ST->getElementType(I))); @@ -1225,7 +1218,7 @@ void SCCPInstVisitor::visitInstruction(Instruction &I) { } bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V, - ValueLatticeElement MergeWithV, + const ValueLatticeElement &MergeWithV, ValueLatticeElement::MergeOptions Opts) { if (IV.mergeIn(MergeWithV, Opts)) { pushUsersToWorkList(V); @@ -1264,7 +1257,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, return; } - ValueLatticeElement BCValue = getValueState(BI->getCondition()); + const ValueLatticeElement &BCValue = getValueState(BI->getCondition()); ConstantInt *CI = getConstantInt(BCValue, BI->getCondition()->getType()); if (!CI) { // Overdefined condition variables, and branches on unfoldable constant @@ -1326,7 +1319,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, // the target as executable. if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) { // Casts are folded by visitCastInst. - ValueLatticeElement IBRValue = getValueState(IBR->getAddress()); + const ValueLatticeElement &IBRValue = getValueState(IBR->getAddress()); BlockAddress *Addr = dyn_cast_or_null<BlockAddress>( getConstant(IBRValue, IBR->getAddress()->getType())); if (!Addr) { // Overdefined or unknown condition? @@ -1383,49 +1376,66 @@ bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { // 7. If a conditional branch has a value that is overdefined, make all // successors executable. void SCCPInstVisitor::visitPHINode(PHINode &PN) { - // If this PN returns a struct, just mark the result overdefined. - // TODO: We could do a lot better than this if code actually uses this. - if (PN.getType()->isStructTy()) - return (void)markOverdefined(&PN); - - if (getValueState(&PN).isOverdefined()) - return; // Quick exit - // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant, // and slow us down a lot. Just mark them overdefined. if (PN.getNumIncomingValues() > 64) return (void)markOverdefined(&PN); - unsigned NumActiveIncoming = 0; + if (isInstFullyOverDefined(PN)) + return; + SmallVector<unsigned> FeasibleIncomingIndices; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) + continue; + FeasibleIncomingIndices.push_back(i); + } // Look at all of the executable operands of the PHI node. If any of them // are overdefined, the PHI becomes overdefined as well. If they are all // constant, and they agree with each other, the PHI becomes the identical // constant. If they are constant and don't agree, the PHI is a constant // range. If there are no executable operands, the PHI remains unknown. - ValueLatticeElement PhiState = getValueState(&PN); - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) - continue; - - ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); - PhiState.mergeIn(IV); - NumActiveIncoming++; - if (PhiState.isOverdefined()) - break; + if (StructType *STy = dyn_cast<StructType>(PN.getType())) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + ValueLatticeElement PhiState = getStructValueState(&PN, i); + if (PhiState.isOverdefined()) + continue; + for (unsigned j : FeasibleIncomingIndices) { + const ValueLatticeElement &IV = + getStructValueState(PN.getIncomingValue(j), i); + PhiState.mergeIn(IV); + if (PhiState.isOverdefined()) + break; + } + ValueLatticeElement &PhiStateRef = getStructValueState(&PN, i); + mergeInValue(PhiStateRef, &PN, PhiState, + ValueLatticeElement::MergeOptions().setMaxWidenSteps( + FeasibleIncomingIndices.size() + 1)); + PhiStateRef.setNumRangeExtensions( + std::max((unsigned)FeasibleIncomingIndices.size(), + PhiStateRef.getNumRangeExtensions())); + } + } else { + ValueLatticeElement PhiState = getValueState(&PN); + for (unsigned i : FeasibleIncomingIndices) { + const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i)); + PhiState.mergeIn(IV); + if (PhiState.isOverdefined()) + break; + } + // We allow up to 1 range extension per active incoming value and one + // additional extension. Note that we manually adjust the number of range + // extensions to match the number of active incoming values. This helps to + // limit multiple extensions caused by the same incoming value, if other + // incoming values are equal. + ValueLatticeElement &PhiStateRef = ValueState[&PN]; + mergeInValue(PhiStateRef, &PN, PhiState, + ValueLatticeElement::MergeOptions().setMaxWidenSteps( + FeasibleIncomingIndices.size() + 1)); + PhiStateRef.setNumRangeExtensions( + std::max((unsigned)FeasibleIncomingIndices.size(), + PhiStateRef.getNumRangeExtensions())); } - - // We allow up to 1 range extension per active incoming value and one - // additional extension. Note that we manually adjust the number of range - // extensions to match the number of active incoming values. This helps to - // limit multiple extensions caused by the same incoming value, if other - // incoming values are equal. - mergeInValue(&PN, PhiState, - ValueLatticeElement::MergeOptions().setMaxWidenSteps( - NumActiveIncoming + 1)); - ValueLatticeElement &PhiStateRef = getValueState(&PN); - PhiStateRef.setNumRangeExtensions( - std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); } void SCCPInstVisitor::visitReturnInst(ReturnInst &I) { @@ -1481,7 +1491,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { } } - ValueLatticeElement OpSt = getValueState(I.getOperand(0)); + const ValueLatticeElement &OpSt = getValueState(I.getOperand(0)); if (OpSt.isUnknownOrUndef()) return; @@ -1496,9 +1506,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { if (I.getDestTy()->isIntOrIntVectorTy() && I.getSrcTy()->isIntOrIntVectorTy() && I.getOpcode() != Instruction::BitCast) { - auto &LV = getValueState(&I); ConstantRange OpRange = OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false); + auto &LV = getValueState(&I); Type *DestTy = I.getDestTy(); ConstantRange Res = ConstantRange::getEmpty(DestTy->getScalarSizeInBits()); @@ -1516,19 +1526,24 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI, const WithOverflowInst *WO, unsigned Idx) { Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); - ValueLatticeElement L = getValueState(LHS); - ValueLatticeElement R = getValueState(RHS); + Type *Ty = LHS->getType(); + addAdditionalUser(LHS, &EVI); addAdditionalUser(RHS, &EVI); - if (L.isUnknownOrUndef() || R.isUnknownOrUndef()) - return; // Wait to resolve. - Type *Ty = LHS->getType(); + const ValueLatticeElement &L = getValueState(LHS); + if (L.isUnknownOrUndef()) + return; // Wait to resolve. ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false); + + const ValueLatticeElement &R = getValueState(RHS); + if (R.isUnknownOrUndef()) + return; // Wait to resolve. + ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false); if (Idx == 0) { ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR); - mergeInValue(&EVI, ValueLatticeElement::getRange(Res)); + mergeInValue(ValueState[&EVI], &EVI, ValueLatticeElement::getRange(Res)); } else { assert(Idx == 1 && "Index can only be 0 or 1"); ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( @@ -1560,7 +1575,7 @@ void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) { if (auto *WO = dyn_cast<WithOverflowInst>(AggVal)) return handleExtractOfWithOverflow(EVI, WO, i); ValueLatticeElement EltVal = getStructValueState(AggVal, i); - mergeInValue(getValueState(&EVI), &EVI, EltVal); + mergeInValue(ValueState[&EVI], &EVI, EltVal); } else { // Otherwise, must be extracting from an array. return (void)markOverdefined(&EVI); @@ -1616,14 +1631,18 @@ void SCCPInstVisitor::visitSelectInst(SelectInst &I) { if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); - ValueLatticeElement CondValue = getValueState(I.getCondition()); + const ValueLatticeElement &CondValue = getValueState(I.getCondition()); if (CondValue.isUnknownOrUndef()) return; if (ConstantInt *CondCB = getConstantInt(CondValue, I.getCondition()->getType())) { Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue(); - mergeInValue(&I, getValueState(OpVal)); + const ValueLatticeElement &OpValState = getValueState(OpVal); + // Safety: ValueState[&I] doesn't invalidate OpValState since it is already + // in the map. + assert(ValueState.contains(&I) && "&I is not in ValueState map."); + mergeInValue(ValueState[&I], &I, OpValState); return; } @@ -1721,7 +1740,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { // being a special floating value. ValueLatticeElement NewV; NewV.markConstant(C, /*MayIncludeUndef=*/true); - return (void)mergeInValue(&I, NewV); + return (void)mergeInValue(ValueState[&I], &I, NewV); } } @@ -1741,7 +1760,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind()); else R = A.binaryOp(BO->getOpcode(), B); - mergeInValue(&I, ValueLatticeElement::getRange(R)); + mergeInValue(ValueState[&I], &I, ValueLatticeElement::getRange(R)); // TODO: Currently we do not exploit special values that produce something // better than overdefined with an overdefined operand for vector or floating @@ -1767,7 +1786,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { if (C) { ValueLatticeElement CV; CV.markConstant(C); - mergeInValue(&I, CV); + mergeInValue(ValueState[&I], &I, CV); return; } @@ -1802,7 +1821,7 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { Operands.reserve(I.getNumOperands()); for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - ValueLatticeElement State = getValueState(I.getOperand(i)); + const ValueLatticeElement &State = getValueState(I.getOperand(i)); if (State.isUnknownOrUndef()) return; // Operands are not resolved yet. @@ -1881,14 +1900,13 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) { if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); - ValueLatticeElement PtrVal = getValueState(I.getOperand(0)); + const ValueLatticeElement &PtrVal = getValueState(I.getOperand(0)); if (PtrVal.isUnknownOrUndef()) return; // The pointer is not resolved yet! - ValueLatticeElement &IV = ValueState[&I]; - if (SCCPSolver::isConstant(PtrVal)) { Constant *Ptr = getConstant(PtrVal, I.getOperand(0)->getType()); + ValueLatticeElement &IV = ValueState[&I]; // load null is undefined. if (isa<ConstantPointerNull>(Ptr)) { @@ -1916,7 +1934,7 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) { } // Fall back to metadata. - mergeInValue(&I, getValueFromMetadata(&I)); + mergeInValue(ValueState[&I], &I, getValueFromMetadata(&I)); } void SCCPInstVisitor::visitCallBase(CallBase &CB) { @@ -1944,7 +1962,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) { return markOverdefined(&CB); // Can't handle struct args. if (A.get()->getType()->isMetadataTy()) continue; // Carried in CB, not allowed in Operands. - ValueLatticeElement State = getValueState(A); + const ValueLatticeElement &State = getValueState(A); if (State.isUnknownOrUndef()) return; // Operands are not resolved yet. @@ -1964,7 +1982,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) { } // Fall back to metadata. - mergeInValue(&CB, getValueFromMetadata(&CB)); + mergeInValue(ValueState[&CB], &CB, getValueFromMetadata(&CB)); } void SCCPInstVisitor::handleCallArguments(CallBase &CB) { @@ -1992,10 +2010,11 @@ void SCCPInstVisitor::handleCallArguments(CallBase &CB) { mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg, getMaxWidenStepsOpts()); } - } else - mergeInValue(&*AI, - getValueState(*CAI).intersect(getArgAttributeVL(&*AI)), - getMaxWidenStepsOpts()); + } else { + ValueLatticeElement CallArg = + getValueState(*CAI).intersect(getArgAttributeVL(&*AI)); + mergeInValue(ValueState[&*AI], &*AI, CallArg, getMaxWidenStepsOpts()); + } } } } @@ -2076,7 +2095,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { if (II->getIntrinsicID() == Intrinsic::vscale) { unsigned BitWidth = CB.getType()->getScalarSizeInBits(); const ConstantRange Result = getVScaleRange(II->getFunction(), BitWidth); - return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + return (void)mergeInValue(ValueState[II], II, + ValueLatticeElement::getRange(Result)); } if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { @@ -2094,7 +2114,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { ConstantRange Result = ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); - return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + return (void)mergeInValue(ValueState[II], II, + ValueLatticeElement::getRange(Result)); } } @@ -2121,10 +2142,25 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { return handleCallOverdefined(CB); // Not tracking this callee. // If so, propagate the return value of the callee into this call result. - mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts()); + mergeInValue(ValueState[&CB], &CB, TFRVI->second, getMaxWidenStepsOpts()); } } +bool SCCPInstVisitor::isInstFullyOverDefined(Instruction &Inst) { + // For structure Type, we handle each member separately. + // A structure object won't be considered as overdefined when + // there is at least one member that is not overdefined. + if (StructType *STy = dyn_cast<StructType>(Inst.getType())) { + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) { + if (!getStructValueState(&Inst, i).isOverdefined()) + return false; + } + return true; + } + + return getValueState(&Inst).isOverdefined(); +} + void SCCPInstVisitor::solve() { // Process the work lists until they are empty! while (!BBWorkList.empty() || !InstWorkList.empty()) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b62c8f1..9cd52da 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2242,8 +2242,49 @@ public: /// may not be necessary. bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const; bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, - Align Alignment, const int64_t Diff, Value *Ptr0, - Value *PtrN, StridedPtrInfo &SPtrInfo) const; + Align Alignment, const int64_t Diff, + const size_t Sz) const; + + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with constant stride). + /// + /// TODO: + /// It is possible that the load gets "widened". Suppose that originally each + /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is + /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2 + /// ... + /// %b + 0 * %s + (w - 1) + /// + /// %b + 1 * %s + 0 + /// %b + 1 * %s + 1 + /// %b + 1 * %s + 2 + /// ... + /// %b + 1 * %s + (w - 1) + /// ... + /// + /// %b + (n - 1) * %s + 0 + /// %b + (n - 1) * %s + 1 + /// %b + (n - 1) * %s + 2 + /// ... + /// %b + (n - 1) * %s + (w - 1) + /// + /// In this case we will generate a strided load of type `<n x (k * w)>`. + /// + /// \param PointerOps list of pointer arguments of loads. + /// \param ElemTy original scalar type of loads. + /// \param Alignment alignment of the first load. + /// \param SortedIndices is the order of PointerOps as returned by + /// `sortPtrAccesses` + /// \param Diff Pointer difference between the lowest and the highes pointer + /// in `PointerOps` as returned by `getPointersDiff`. + /// \param Ptr0 first pointer in `PointersOps`. + /// \param PtrN last pointer in `PointersOps`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeConstantStrideCandidate( + const ArrayRef<Value *> PointerOps, Type *ElemTy, Align Alignment, + const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff, + Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; /// Return true if an array of scalar loads can be replaced with a strided /// load (with run-time stride). @@ -6849,9 +6890,8 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, /// current graph (for masked gathers extra extractelement instructions /// might be required). bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, - Align Alignment, const int64_t Diff, Value *Ptr0, - Value *PtrN, StridedPtrInfo &SPtrInfo) const { - const size_t Sz = PointerOps.size(); + Align Alignment, const int64_t Diff, + const size_t Sz) const { if (Diff % (Sz - 1) != 0) return false; @@ -6875,27 +6915,40 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; if (!TTI->isLegalStridedLoadStore(VecTy, Alignment)) return false; + return true; + } + return false; +} - // Iterate through all pointers and check if all distances are - // unique multiple of Dist. - SmallSet<int64_t, 4> Dists; - for (Value *Ptr : PointerOps) { - int64_t Dist = 0; - if (Ptr == PtrN) - Dist = Diff; - else if (Ptr != Ptr0) - Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); - // If the strides are not the same or repeated, we can't - // vectorize. - if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) - break; - } - if (Dists.size() == Sz) { - Type *StrideTy = DL->getIndexType(Ptr0->getType()); - SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride); - SPtrInfo.Ty = getWidenedType(ScalarTy, Sz); - return true; - } +bool BoUpSLP::analyzeConstantStrideCandidate( + const ArrayRef<Value *> PointerOps, Type *ScalarTy, Align Alignment, + const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff, + Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const { + const size_t Sz = PointerOps.size(); + if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz)) + return false; + + int64_t Stride = Diff / static_cast<int64_t>(Sz - 1); + + // Iterate through all pointers and check if all distances are + // unique multiple of Dist. + SmallSet<int64_t, 4> Dists; + for (Value *Ptr : PointerOps) { + int64_t Dist = 0; + if (Ptr == PtrN) + Dist = Diff; + else if (Ptr != Ptr0) + Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); + // If the strides are not the same or repeated, we can't + // vectorize. + if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) + break; + } + if (Dists.size() == Sz) { + Type *StrideTy = DL->getIndexType(Ptr0->getType()); + SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride); + SPtrInfo.Ty = getWidenedType(ScalarTy, Sz); + return true; } return false; } @@ -6995,8 +7048,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( Align Alignment = cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()]) ->getAlign(); - if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN, - SPtrInfo)) + if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, Alignment, Order, + *Diff, Ptr0, PtrN, SPtrInfo)) return LoadsState::StridedVectorize; } if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0e0b042..84d2ea6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -407,6 +407,10 @@ public: VPBasicBlock *getParent() { return Parent; } const VPBasicBlock *getParent() const { return Parent; } + /// \return the VPRegionBlock which the recipe belongs to. + VPRegionBlock *getRegion(); + const VPRegionBlock *getRegion() const; + /// The method which generates the output IR instructions that correspond to /// this VPRecipe, thereby "executing" the VPlan. virtual void execute(VPTransformState &State) = 0; @@ -4075,6 +4079,14 @@ public: } }; +inline VPRegionBlock *VPRecipeBase::getRegion() { + return getParent()->getParent(); +} + +inline const VPRegionBlock *VPRecipeBase::getRegion() const { + return getParent()->getParent(); +} + /// VPlan models a candidate for vectorization, encoding various decisions take /// to produce efficient output IR, including which branches, basic-blocks and /// output IR instructions to generate, and their cost. VPlan holds a diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index f413c63..7e074c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -377,7 +377,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, #ifndef NDEBUG auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { - auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); + VPRegionBlock *Region = R->getRegion(); if (Region && Region->isReplicator()) { assert(Region->getNumSuccessors() == 1 && Region->getNumPredecessors() == 1 && "Expected SESE region!"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7a98c75..d1e67e6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2352,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return false; auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); - auto *CanIV = getParent()->getParent()->getCanonicalIV(); + auto *CanIV = getRegion()->getCanonicalIV(); return StartC && StartC->isZero() && StepC && StepC->isOne() && getScalarType() == CanIV->getScalarType(); } @@ -3076,7 +3076,7 @@ static void scalarizeInstruction(const Instruction *Instr, State.AC->registerAssumption(II); assert( - (RepRecipe->getParent()->getParent() || + (RepRecipe->getRegion() || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() || all_of(RepRecipe->operands(), [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) && @@ -3268,7 +3268,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, to_vector(operands()), VF); // If the recipe is not predicated (i.e. not in a replicate region), return // the scalar cost. Otherwise handle predicated cost. - if (!getParent()->getParent()->isReplicator()) + if (!getRegion()->isReplicator()) return ScalarCost; // Account for the phi nodes that we will create. @@ -3284,7 +3284,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::Store: { // TODO: See getMemInstScalarizationCost for how to handle replicating and // predicated cases. - const VPRegionBlock *ParentRegion = getParent()->getParent(); + const VPRegionBlock *ParentRegion = getRegion(); if (ParentRegion && ParentRegion->isReplicator()) break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cae9aee8..f5f616f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1858,8 +1858,8 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, return nullptr; VPRegionBlock *EnclosingLoopRegion = HoistCandidate->getParent()->getEnclosingLoopRegion(); - assert((!HoistCandidate->getParent()->getParent() || - HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) && + assert((!HoistCandidate->getRegion() || + HoistCandidate->getRegion() == EnclosingLoopRegion) && "CFG in VPlan should still be flat, without replicate regions"); // Hoist candidate was already visited, no need to hoist. if (!Visited.insert(HoistCandidate).second) @@ -2898,7 +2898,7 @@ void VPlanTransforms::replaceSymbolicStrides( // evolution. auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) { auto *R = cast<VPRecipeBase>(&U); - return R->getParent()->getParent() || + return R->getRegion() || R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor(); }; ValueToSCEVMapTy RewriteMap; @@ -3803,8 +3803,7 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { continue; auto *DefR = cast<VPRecipeWithIRFlags>(&R); auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { - VPRegionBlock *ParentRegion = - cast<VPRecipeBase>(U)->getParent()->getParent(); + VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion(); return !U->usesScalars(DefR) || ParentRegion != LoopRegion; }; if ((isa<VPReplicateRecipe>(DefR) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index cf95ac0..9a2497e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -64,7 +64,7 @@ inline bool isSingleScalar(const VPValue *VPV) { return true; if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) { - const VPRegionBlock *RegionOfR = Rep->getParent()->getParent(); + const VPRegionBlock *RegionOfR = Rep->getRegion(); // Don't consider recipes in replicate regions as uniform yet; their first // lane cannot be accessed when executing the replicate region for other // lanes. diff --git a/llvm/test/CodeGen/AMDGPU/abs_i32.ll b/llvm/test/CodeGen/AMDGPU/abs_i32.ll new file mode 100644 index 0000000..b53047f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/abs_i32.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600 %s + +define amdgpu_kernel void @abs_v1(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v1: +; R600: ; %bb.0: +; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: MOV * T0.W, KC0[2].Z, +; R600-NEXT: SUB_INT * T1.W, 0.0, PV.W, +; R600-NEXT: MAX_INT T0.X, T0.W, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %res = call i32 @llvm.abs.i32(i32 %arg, i1 false) + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @abs_v2(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v2: +; R600: ; %bb.0: +; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z, +; R600-NEXT: MAX_INT T0.X, KC0[2].Z, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %neg = sub i32 0, %arg + %cond = icmp sgt i32 %arg, %neg + %res = select i1 %cond, i32 %arg, i32 %neg + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @abs_v3(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v3: +; R600: ; %bb.0: +; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z, +; R600-NEXT: MAX_INT T0.X, PV.W, KC0[2].Z, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %neg = sub i32 0, %arg + %cond = icmp sgt i32 %neg, %arg + %res = select i1 %cond, i32 %neg, i32 %arg + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll new file mode 100644 index 0000000..489323b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half>, + <vscale x 1 x half>, + <vscale x 1 x half>, + iXLen, iXLen); + +declare <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32( + <vscale x 1 x i32>, + <vscale x 1 x i32>, + <vscale x 1 x i32>, + iXLen); + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @test_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_half_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + call void @llvm.riscv.vse(<vscale x 1 x half> %a, ptr %ptr, iXLen %2) + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_i32_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i32> %3, <vscale x 1 x i32> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_i32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vadd.vv v10, v10, v11 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32( + <vscale x 1 x i32> poison, + <vscale x 1 x i32> %3, + <vscale x 1 x i32> %4, + iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + call void @llvm.riscv.vse(<vscale x 1 x i32> %a, ptr %ptr, iXLen %2) + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_half_bf16_half(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_half_bf16_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v11 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v9, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %c = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %a, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + store <vscale x 1 x half> %c, ptr %ptr + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_bf16_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_bf16_half_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %c = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %a, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + store <vscale x 1 x half> %b, ptr %ptr + + ret <vscale x 1 x bfloat> %c +} + +define <vscale x 1 x bfloat> @test_bf16_i16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i16> %3, <vscale x 1 x i16> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_bf16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vadd.vv v9, v10, v11 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vse16.v v9, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x i16> @llvm.riscv.vadd.nxv1i16.nxv1i16( + <vscale x 1 x i16> poison, + <vscale x 1 x i16> %3, + <vscale x 1 x i16> %4, + iXLen %2) + + store <vscale x 1 x i16> %b, ptr %ptr + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll new file mode 100644 index 0000000..db1b081 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll new file mode 100644 index 0000000..d7d49b3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16( + <vscale x 1 x i16>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 1 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16( + <vscale x 1 x i16> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16( + <vscale x 1 x i16>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 1 x i16> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16( + <vscale x 1 x i16> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16( + <vscale x 2 x i16>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 2 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16( + <vscale x 2 x i16> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16( + <vscale x 2 x i16>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 2 x i16> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16( + <vscale x 2 x i16> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16( + <vscale x 4 x i16>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 4 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16( + <vscale x 4 x i16> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16( + <vscale x 4 x i16>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 4 x i16> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16( + <vscale x 4 x i16> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16( + <vscale x 8 x i16>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 8 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16( + <vscale x 8 x i16> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16( + <vscale x 8 x i16>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfclass.v v8, v10, v0.t +; CHECK-NEXT: ret + <vscale x 8 x i16> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16( + <vscale x 8 x i16> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16( + <vscale x 16 x i16>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 16 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16( + <vscale x 16 x i16> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16( + <vscale x 16 x i16>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfclass.v v8, v12, v0.t +; CHECK-NEXT: ret + <vscale x 16 x i16> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16( + <vscale x 16 x i16> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16( + <vscale x 32 x i16>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 32 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16( + <vscale x 32 x i16> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16( + <vscale x 32 x i16>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, tu, mu +; CHECK-NEXT: vfclass.v v8, v16, v0.t +; CHECK-NEXT: ret + <vscale x 32 x i16> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16( + <vscale x 32 x i16> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 32 x i16> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll new file mode 100644 index 0000000..13821d7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll new file mode 100644 index 0000000..09fc199 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmacc.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll new file mode 100644 index 0000000..a337d30 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll new file mode 100644 index 0000000..86ba7c7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen %3) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen %3) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen %3) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen %3) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen %3) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + <vscale x 32 x i1> %2, + iXLen %3) + + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 1 x i1> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 2 x i1> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 4 x i1> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 8 x i1> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 16 x i1> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 32 x i1> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll new file mode 100644 index 0000000..37c0cf5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll new file mode 100644 index 0000000..948d219 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll new file mode 100644 index 0000000..6838f37 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsac.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll new file mode 100644 index 0000000..44bce72 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll new file mode 100644 index 0000000..fbc73119 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+experimental-zvfbfa -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+experimental-zvfbfa -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s + +declare bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16(<vscale x 1 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16(<vscale x 2 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16(<vscale x 4 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16(<vscale x 8 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16(<vscale x 16 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16(<vscale x 32 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat> %0) + ret bfloat %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll new file mode 100644 index 0000000..a810809 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat>, bfloat, iXLen) + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat>, bfloat, iXLen) + +define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat>, bfloat, iXLen) + +define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat>, bfloat, iXLen) + +define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat>, bfloat, iXLen) + +define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat>, bfloat, iXLen) + +define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16_negzero(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16_negzero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat -0.0, iXLen %1) + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll new file mode 100644 index 0000000..f3293dd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_zero_nxv1bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vmv.v.i_zero_nxv2bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vmv.v.i_zero_nxv4bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vmv.v.i_zero_nxv8bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vmv.v.i_zero_nxv16bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vmv.v.i_zero_nxv32bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll new file mode 100644 index 0000000..7d587fd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat>, + <vscale x 1 x float>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat> poison, + <vscale x 1 x float> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat>, + <vscale x 1 x float>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat> %0, + <vscale x 1 x float> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat>, + <vscale x 2 x float>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat> poison, + <vscale x 2 x float> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat>, + <vscale x 2 x float>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat> %0, + <vscale x 2 x float> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat>, + <vscale x 4 x float>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat> poison, + <vscale x 4 x float> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat>, + <vscale x 4 x float>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat> %0, + <vscale x 4 x float> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat>, + <vscale x 8 x float>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat> poison, + <vscale x 8 x float> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat>, + <vscale x 8 x float>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat> %0, + <vscale x 8 x float> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat>, + <vscale x 16 x float>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat> poison, + <vscale x 16 x float> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat>, + <vscale x 16 x float>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat> %0, + <vscale x 16 x float> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll new file mode 100644 index 0000000..ee9e3d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll new file mode 100644 index 0000000..521f727 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll new file mode 100644 index 0000000..ab9ebad --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v10, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v12, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v16, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll new file mode 100644 index 0000000..61c6803 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v10, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v12, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v16, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll new file mode 100644 index 0000000..4b4091b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll new file mode 100644 index 0000000..2bb6bf5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmacc.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll new file mode 100644 index 0000000..cfbaafa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll new file mode 100644 index 0000000..5ebbb90c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsac.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll new file mode 100644 index 0000000..1211415 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll @@ -0,0 +1,282 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrec7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrec7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrec7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrec7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrec7.v v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrec7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrec7.v v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrec7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrec7.v v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16( + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll new file mode 100644 index 0000000..4626b86 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16( + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll new file mode 100644 index 0000000..54a6d48 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll @@ -0,0 +1,282 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll new file mode 100644 index 0000000..2cd698d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll new file mode 100644 index 0000000..08340be --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll new file mode 100644 index 0000000..e51a42e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll new file mode 100644 index 0000000..c65719c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll new file mode 100644 index 0000000..57a4898 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll new file mode 100644 index 0000000..aea7521 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll @@ -0,0 +1,559 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll new file mode 100644 index 0000000..62feac8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll new file mode 100644 index 0000000..c5417e8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll @@ -0,0 +1,773 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl4re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.wv v8, v10, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.wv v8, v12, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll new file mode 100644 index 0000000..b7df45b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> poison, + <vscale x 1 x i8> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> poison, + <vscale x 2 x i8> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> poison, + <vscale x 4 x i8> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> poison, + <vscale x 8 x i8> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> poison, + <vscale x 16 x i8> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> poison, + <vscale x 32 x i8> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll new file mode 100644 index 0000000..c370261 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> poison, + <vscale x 1 x i8> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> poison, + <vscale x 2 x i8> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> poison, + <vscale x 4 x i8> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> poison, + <vscale x 8 x i8> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> poison, + <vscale x 16 x i8> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> poison, + <vscale x 32 x i8> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll new file mode 100644 index 0000000..a3f6678 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll new file mode 100644 index 0000000..577b93a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll new file mode 100644 index 0000000..1e05e4c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll new file mode 100644 index 0000000..223ad4f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll new file mode 100644 index 0000000..d993e4e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll new file mode 100644 index 0000000..b22899a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll @@ -0,0 +1,773 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl4re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.wv v8, v10, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.wv v8, v12, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll new file mode 100644 index 0000000..9bd859b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll new file mode 100644 index 0000000..73946dc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll new file mode 100644 index 0000000..fac324c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll new file mode 100644 index 0000000..8356b7b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll new file mode 100644 index 0000000..2e1bcc5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll new file mode 100644 index 0000000..283ffc5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll index 25f8ec8..482b4fc 100644 --- a/llvm/test/CodeGen/X86/combine-umax.ll +++ b/llvm/test/CodeGen/X86/combine-umax.ll @@ -60,7 +60,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: test_v16i8_demandedbits: ; SSE2: # %bb.0: -; SSE2-NEXT: pmaxub %xmm1, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll index 76dbcb5..e2757d0 100644 --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -77,7 +77,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: test_v16i8_demandedbits: ; SSE2: # %bb.0: -; SSE2-NEXT: pminub %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll index ac932d5..1a63515 100644 --- a/llvm/test/CodeGen/X86/vector-compress.ll +++ b/llvm/test/CodeGen/X86/vector-compress.ll @@ -1090,7 +1090,6 @@ define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask, <16 x i8> ; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx ; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp) @@ -1335,7 +1334,6 @@ define <32 x i8> @test_compress_v32i8(<32 x i8> %vec, <32 x i1> %mask, <32 x i8> ; AVX2-NEXT: andq $-32, %rsp ; AVX2-NEXT: subq $64, %rsp ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm3, %ymm3 ; AVX2-NEXT: vmovaps %ymm2, (%rsp) @@ -4733,7 +4731,6 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpextrb $0, %xmm0, -{{[0-9]+}}(%rsp) @@ -4751,72 +4748,7 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind { ; AVX2-NEXT: vpextrb $3, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx ; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: vpextrb $4, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addq %rcx, %rax -; AVX2-NEXT: vpextrb $5, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $5, %xmm0, -24(%rsp,%rax) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addq %rcx, %rax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: vpextrb $6, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: vpextrb $7, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $7, %xmm0, -24(%rsp,%rax) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addq %rcx, %rax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: vpextrb $8, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: vpextrb $9, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $9, %xmm0, -24(%rsp,%rax) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addq %rcx, %rax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: vpextrb $10, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: vpextrb $11, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $11, %xmm0, -24(%rsp,%rax) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addq %rcx, %rax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: vpextrb $12, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: vpextrb $13, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: addq %rax, %rcx -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $13, %xmm0, -24(%rsp,%rax) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: addl %ecx, %eax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: vpextrb $14, %xmm0, -24(%rsp,%rcx) -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rax) +; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rcx) ; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll index 1c5be03..ac330a7 100644 --- a/llvm/test/CodeGen/X86/vselect-avx.ll +++ b/llvm/test/CodeGen/X86/vselect-avx.ll @@ -151,23 +151,19 @@ define <32 x i8> @PR22706(<32 x i1> %x) { ; AVX1: ## %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR22706: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll index 6732efc..dbd572d 100644 --- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll +++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll @@ -111,7 +111,6 @@ define void @test_sub_cmp(ptr align 8 %start, ptr %end) { ; N32-NEXT: [[CMP_ENTRY:%.*]] = icmp eq ptr [[START]], [[END]] ; N32-NEXT: br i1 [[CMP_ENTRY]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] ; N32: [[LOOP_HEADER_PREHEADER]]: -; N32-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[PTR_DIFF]], i64 1) ; N32-NEXT: br label %[[LOOP_HEADER:.*]] ; N32: [[LOOP_HEADER]]: ; N32-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] @@ -119,7 +118,7 @@ define void @test_sub_cmp(ptr align 8 %start, ptr %end) { ; N32-NEXT: br i1 [[C_1]], label %[[EXIT_EARLY:.*]], label %[[LOOP_LATCH]] ; N32: [[LOOP_LATCH]]: ; N32-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 -; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]] +; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[PTR_DIFF]] ; N32-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT_LOOPEXIT:.*]] ; N32: [[EXIT_EARLY]]: ; N32-NEXT: br label %[[EXIT]] @@ -162,13 +161,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr ; CHECK-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] ; CHECK-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2 ; CHECK-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]]) +; CHECK-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]] ; CHECK-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]] ; CHECK-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]] ; CHECK: [[LOOP_BODY_PREHEADER]]: ; CHECK-NEXT: br label %[[LOOP_BODY:.*]] ; CHECK: [[LOOP_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] +; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1 +; CHECK-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]] +; CHECK-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -182,13 +185,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr ; N32-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]] ; N32-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2 ; N32-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]]) +; N32-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]] ; N32-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]] ; N32-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]] ; N32: [[LOOP_BODY_PREHEADER]]: ; N32-NEXT: br label %[[LOOP_BODY:.*]] ; N32: [[LOOP_BODY]]: +; N32-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ] ; N32-NEXT: [[TMP0:%.*]] = call i1 @cond() -; N32-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] +; N32-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1 +; N32-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]] +; N32-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]] ; N32: [[EXIT_LOOPEXIT]]: ; N32-NEXT: br label %[[EXIT]] ; N32: [[EXIT]]: diff --git a/llvm/test/Transforms/InstCombine/scmp.ll b/llvm/test/Transforms/InstCombine/scmp.ll index 2bf22ae..c0be5b9 100644 --- a/llvm/test/Transforms/InstCombine/scmp.ll +++ b/llvm/test/Transforms/InstCombine/scmp.ll @@ -423,6 +423,86 @@ define i8 @scmp_from_select_eq_and_gt_commuted3(i32 %x, i32 %y) { ret i8 %r } +; Commutative tests for (x != y) ? (x > y ? 1 : -1) : 0 +define i8 @scmp_from_select_ne_and_gt_commuted1(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]]) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, %y + %gt = icmp slt i32 %x, %y + %sel1 = select i1 %gt, i8 1, i8 -1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + +define i8 @scmp_from_select_ne_and_gt_commuted2(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted2( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]]) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, %y + %gt = icmp sgt i32 %x, %y + %sel1 = select i1 %gt, i8 -1, i8 1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + +define i8 @scmp_from_select_ne_and_gt_commuted3(i32 %x, i32 %y) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_and_gt_commuted3( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, %y + %gt = icmp sgt i32 %x, %y + %sel1 = select i1 %gt, i8 1, i8 -1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + +; Commutative tests for x != C ? (x > C - 1 ? 1 : -1) : 0 +define i8 @scmp_from_select_ne_const_and_gt_commuted1(i32 %x) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted1( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, 5 + %gt = icmp sgt i32 %x, 4 + %sel1 = select i1 %gt, i8 1, i8 -1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + +define i8 @scmp_from_select_ne_const_and_gt_commuted2(i32 %x) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, 5 + %gt = icmp sgt i32 %x, 4 + %sel1 = select i1 %gt, i8 1, i8 -1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + +define i8 @scmp_from_select_ne_const_and_gt_commuted3(i32 %x) { +; CHECK-LABEL: define i8 @scmp_from_select_ne_const_and_gt_commuted3( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 5) +; CHECK-NEXT: ret i8 [[R]] +; + %ne = icmp ne i32 %x, 5 + %gt = icmp sgt i32 %x, 4 + %sel1 = select i1 %gt, i8 1, i8 -1 + %r = select i1 %ne, i8 %sel1, i8 0 + ret i8 %r +} + define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: define <3 x i2> @scmp_unary_shuffle_ops( ; CHECK-SAME: <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) { @@ -436,6 +516,187 @@ define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) { ret <3 x i2> %r } +define i32 @scmp_sgt_slt(i32 %a) { +; CHECK-LABEL: define i32 @scmp_sgt_slt( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr i32 [[A]], 31 +; CHECK-NEXT: [[CMP_INV:%.*]] = icmp slt i32 [[A]], 1 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP_INV]], i32 [[A_LOBIT]], i32 1 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp sgt i32 %a, 0 + %cmp1 = icmp slt i32 %a, 0 + %. = select i1 %cmp1, i32 -1, i32 0 + %retval.0 = select i1 %cmp, i32 1, i32 %. + ret i32 %retval.0 +} + +define i32 @scmp_zero_slt(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_slt( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1.inv = icmp slt i32 %a, 1 + %. = select i1 %cmp1.inv, i32 -1, i32 1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + +define i32 @scmp_zero_sgt(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_sgt( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1.inv = icmp sgt i32 %a, -1 + %. = select i1 %cmp1.inv, i32 1, i32 -1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + + +define i32 @scmp_zero_sgt_1(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_sgt_1( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[COND2:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[COND2]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp sgt i32 %a, -1 + %cond = select i1 %cmp1, i32 1, i32 -1 + %cond2 = select i1 %cmp, i32 0, i32 %cond + ret i32 %cond2 +} + +define i32 @scmp_zero_slt_1(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_slt_1( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[COND2:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[COND2]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp slt i32 %a, 1 + %cond = select i1 %cmp1, i32 -1, i32 1 + %cond2 = select i1 %cmp, i32 0, i32 %cond + ret i32 %cond2 +} + +define i32 @scmp_zero_slt_neg(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_slt_neg( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A]], -1 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], i32 -1, i32 1 +; CHECK-NEXT: [[COND2:%.*]] = select i1 [[CMP]], i32 0, i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND2]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp slt i32 %a, -1 + %cond = select i1 %cmp1, i32 -1, i32 1 + %cond2 = select i1 %cmp, i32 0, i32 %cond + ret i32 %cond2 +} + +define i32 @scmp_zero_sgt_neg(i32 %a) { +; CHECK-LABEL: define i32 @scmp_zero_sgt_neg( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 1 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], i32 1, i32 -1 +; CHECK-NEXT: [[COND2:%.*]] = select i1 [[CMP]], i32 0, i32 [[COND]] +; CHECK-NEXT: ret i32 [[COND2]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp sgt i32 %a, 1 + %cond = select i1 %cmp1, i32 1, i32 -1 + %cond2 = select i1 %cmp, i32 0, i32 %cond + ret i32 %cond2 +} + +define i32 @ucmp_ugt_ult_neg(i32 %a) { +; CHECK-LABEL: define i32 @ucmp_ugt_ult_neg( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[A]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[CMP_NOT]] to i32 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp ugt i32 %a, 0 + %cmp1 = icmp ult i32 %a, 0 + %. = select i1 %cmp1, i32 -1, i32 0 + %retval.0 = select i1 %cmp, i32 1, i32 %. + ret i32 %retval.0 +} + +define i32 @ucmp_zero_ult_neg(i32 %a) { +; CHECK-LABEL: define i32 @ucmp_zero_ult_neg( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1.inv = icmp ult i32 %a, 1 + %. = select i1 %cmp1.inv, i32 -1, i32 1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + +define i32 @ucmp_zero_ugt_neg(i32 %a) { +; CHECK-LABEL: define i32 @ucmp_zero_ugt_neg( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, 0 + %cmp1.inv = icmp ugt i32 %a, -1 + %. = select i1 %cmp1.inv, i32 1, i32 -1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + +define i32 @scmp_sgt_slt_ab(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @scmp_sgt_slt_ab( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %a, %b + %. = select i1 %cmp1, i32 -1, i32 0 + %retval.0 = select i1 %cmp, i32 1, i32 %. + ret i32 %retval.0 +} + +define i32 @scmp_zero_slt_ab(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @scmp_zero_slt_ab( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, %b + %cmp1.inv = icmp slt i32 %a, %b + %. = select i1 %cmp1.inv, i32 -1, i32 1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + +define i32 @scmp_zero_sgt_ab(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @scmp_zero_sgt_ab( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %cmp = icmp eq i32 %a, %b + %cmp1.inv = icmp sgt i32 %a, %b + %. = select i1 %cmp1.inv, i32 1, i32 -1 + %retval.0 = select i1 %cmp, i32 0, i32 %. + ret i32 %retval.0 +} + ; Negative test: true value of outer select is not zero define i8 @scmp_from_select_eq_and_gt_neg1(i32 %x, i32 %y) { ; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg1( diff --git a/llvm/test/Transforms/SCCP/constant-range-struct.ll b/llvm/test/Transforms/SCCP/constant-range-struct.ll index 7a399df..0f45b38 100644 --- a/llvm/test/Transforms/SCCP/constant-range-struct.ll +++ b/llvm/test/Transforms/SCCP/constant-range-struct.ll @@ -25,7 +25,7 @@ true: br label %exit false: - %s.3 = insertvalue {i64, i64} undef, i64 30, 0 + %s.3 = insertvalue {i64, i64} poison, i64 30, 0 %s.4 = insertvalue {i64, i64} %s.3, i64 300, 1 br label %exit @@ -39,14 +39,14 @@ define void @struct1_caller() { ; CHECK-NEXT: [[S:%.*]] = call { i64, i64 } @struct1() ; CHECK-NEXT: [[V1:%.*]] = extractvalue { i64, i64 } [[S]], 0 ; CHECK-NEXT: [[V2:%.*]] = extractvalue { i64, i64 } [[S]], 1 -; CHECK-NEXT: [[T_1:%.*]] = icmp ne i64 [[V1]], 10 -; CHECK-NEXT: call void @use(i1 [[T_1]]) -; CHECK-NEXT: [[T_2:%.*]] = icmp ult i64 [[V1]], 100 -; CHECK-NEXT: call void @use(i1 [[T_2]]) -; CHECK-NEXT: [[T_3:%.*]] = icmp ne i64 [[V2]], 0 +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[T_3:%.*]] = icmp eq i64 [[V1]], 20 ; CHECK-NEXT: call void @use(i1 [[T_3]]) -; CHECK-NEXT: [[T_4:%.*]] = icmp ult i64 [[V2]], 301 -; CHECK-NEXT: call void @use(i1 [[T_4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[T_6:%.*]] = icmp eq i64 [[V2]], 300 +; CHECK-NEXT: call void @use(i1 [[T_6]]) ; CHECK-NEXT: ret void ; %s = call {i64, i64} @struct1() @@ -57,10 +57,14 @@ define void @struct1_caller() { call void @use(i1 %t.1) %t.2 = icmp ult i64 %v1, 100 call void @use(i1 %t.2) - %t.3 = icmp ne i64 %v2, 0 + %t.3 = icmp eq i64 %v1, 20 call void @use(i1 %t.3) - %t.4 = icmp ult i64 %v2, 301 + %t.4 = icmp ne i64 %v2, 0 call void @use(i1 %t.4) + %t.5 = icmp ult i64 %v2, 301 + call void @use(i1 %t.5) + %t.6 = icmp eq i64 %v2, 300 + call void @use(i1 %t.6) ret void } @@ -76,7 +80,7 @@ define internal {i64, i64} @struct2() { ; CHECK: exit: ; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 20, [[TRUE]] ], [ 30, [[FALSE]] ] ; CHECK-NEXT: [[V2:%.*]] = phi i64 [ 200, [[TRUE]] ], [ 300, [[FALSE]] ] -; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } undef, i64 [[V1]], 0 +; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } poison, i64 [[V1]], 0 ; CHECK-NEXT: [[S_2:%.*]] = insertvalue { i64, i64 } [[S_1]], i64 [[V2]], 1 ; CHECK-NEXT: ret { i64, i64 } [[S_2]] ; @@ -92,7 +96,7 @@ false: exit: %v1 = phi i64 [ 20, %true ], [ 30, %false ] %v2 = phi i64 [ 200, %true ], [ 300, %false ] - %s.1 = insertvalue {i64, i64} undef, i64 %v1, 0 + %s.1 = insertvalue {i64, i64} poison, i64 %v1, 0 %s.2 = insertvalue {i64, i64} %s.1, i64 %v2, 1 ret {i64, i64} %s.2 } @@ -153,3 +157,40 @@ define void @struct2_caller() { ret void } + +%"phi_type" = type {i64, i64} + +define internal %"phi_type" @test(i32 %input) { +; CHECK-LABEL: @test( +; CHECK-NEXT: br label [[COND_TRUE_I:%.*]] +; CHECK: cond.true.i: +; CHECK-NEXT: br label [[COND_END_I:%.*]] +; CHECK: cond.end.i: +; CHECK-NEXT: ret [[PHI_TYPE:%.*]] poison +; + %cmp.cond = icmp eq i32 %input, 1 + br i1 %cmp.cond, label %cond.true.i, label %cond.false.i + +cond.true.i: + %r1.tmp = insertvalue %"phi_type" poison, i64 1, 0 + %r1.tmp.2 = insertvalue %"phi_type" %r1.tmp, i64 2, 1 + br label %cond.end.i + +cond.false.i: + %r2.tmp = insertvalue %"phi_type" poison, i64 3, 0 + %r2.tmp.2 = insertvalue %"phi_type" %r2.tmp, i64 4, 1 + br label %cond.end.i + +cond.end.i: + %retval = phi %"phi_type" [ %r1.tmp.2, %cond.true.i ], [ %r2.tmp.2, %cond.false.i ] + ret %"phi_type" %retval +} + +define %"phi_type" @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call fastcc [[PHI_TYPE:%.*]] @[[TEST:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 noundef 1) +; CHECK-NEXT: ret [[PHI_TYPE]] { i64 1, i64 2 } +; + %call.1 = tail call fastcc noundef %"phi_type" @test(i32 noundef 1) + ret %"phi_type" %call.1 +} diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll b/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll new file mode 100644 index 0000000..94b45d2 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-instructions-alloca.ll @@ -0,0 +1,16 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions --test FileCheck --test-arg --check-prefixes=CHECK,INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=CHECK,RESULT %s < %t + +; CHECK-LABEL: define void @alloca( +; INTERESTING: call void @llvm.lifetime.start.p0( +; INTERESTING: call void @llvm.lifetime.end.p0( + +; RESULT: call void @llvm.lifetime.start.p0(ptr poison) +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr poison) +; RESULT-NEXT: ret void +define void @alloca(ptr %ptr) { + %alloca = alloca i32, align 4 + call void @llvm.lifetime.start.p0(ptr %alloca) + call void @llvm.lifetime.end.p0(ptr %alloca) + ret void +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp index f1f5d6b..19b69e8 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp @@ -13,6 +13,8 @@ #include "ReduceInstructions.h" #include "Utils.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" using namespace llvm; @@ -37,7 +39,9 @@ void llvm::reduceInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { for (auto &Inst : make_early_inc_range(make_range(BB.begin(), std::prev(BB.end())))) { if (!shouldAlwaysKeep(Inst) && !O.shouldKeep()) { - Inst.replaceAllUsesWith(getDefaultValue(Inst.getType())); + Inst.replaceAllUsesWith(isa<AllocaInst>(Inst) + ? PoisonValue::get(Inst.getType()) + : getDefaultValue(Inst.getType())); Inst.eraseFromParent(); } } diff --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp index eaed4e1..5b3df91 100644 --- a/llvm/unittests/ADT/BitTest.cpp +++ b/llvm/unittests/ADT/BitTest.cpp @@ -270,6 +270,22 @@ TEST(BitTest, BitWidthConstexpr) { llvm::bit_width_constexpr(std::numeric_limits<uint64_t>::max()) == 64); } +TEST(BitTest, BitCeilConstexpr) { + static_assert(llvm::bit_ceil_constexpr(0u) == 1); + static_assert(llvm::bit_ceil_constexpr(1u) == 1); + static_assert(llvm::bit_ceil_constexpr(2u) == 2); + static_assert(llvm::bit_ceil_constexpr(3u) == 4); + static_assert(llvm::bit_ceil_constexpr(4u) == 4); + static_assert(llvm::bit_ceil_constexpr(5u) == 8); + static_assert(llvm::bit_ceil_constexpr(6u) == 8); + static_assert(llvm::bit_ceil_constexpr(7u) == 8); + static_assert(llvm::bit_ceil_constexpr(8u) == 8); + + static_assert(llvm::bit_ceil_constexpr(255u) == 256); + static_assert(llvm::bit_ceil_constexpr(256u) == 256); + static_assert(llvm::bit_ceil_constexpr(257u) == 512); +} + TEST(BitTest, CountlZero) { uint8_t Z8 = 0; uint16_t Z16 = 0; |