diff options
Diffstat (limited to 'llvm')
122 files changed, 3646 insertions, 851 deletions
diff --git a/llvm/include/llvm/ADT/Bitfields.h b/llvm/include/llvm/ADT/Bitfields.h index 4064d71..1af2761 100644 --- a/llvm/include/llvm/ADT/Bitfields.h +++ b/llvm/include/llvm/ADT/Bitfields.h @@ -86,89 +86,43 @@ #include <limits> // numeric_limits #include <type_traits> +#include "llvm/Support/MathExtras.h" + namespace llvm { namespace bitfields_details { -/// A struct defining useful bit patterns for n-bits integer types. -template <typename T, unsigned Bits> struct BitPatterns { - /// Bit patterns are forged using the equivalent `Unsigned` type because of - /// undefined operations over signed types (e.g. Bitwise shift operators). - /// Moreover same size casting from unsigned to signed is well defined but not - /// the other way around. - using Unsigned = std::make_unsigned_t<T>; - static_assert(sizeof(Unsigned) == sizeof(T), "Types must have same size"); - - static constexpr unsigned TypeBits = sizeof(Unsigned) * CHAR_BIT; - static_assert(TypeBits >= Bits, "n-bit must fit in T"); - - /// e.g. with TypeBits == 8 and Bits == 6. - static constexpr Unsigned AllZeros = Unsigned(0); // 00000000 - static constexpr Unsigned AllOnes = ~Unsigned(0); // 11111111 - static constexpr Unsigned Umin = AllZeros; // 00000000 - static constexpr Unsigned Umax = AllOnes >> (TypeBits - Bits); // 00111111 - static constexpr Unsigned SignBitMask = Unsigned(1) << (Bits - 1); // 00100000 - static constexpr Unsigned Smax = Umax >> 1U; // 00011111 - static constexpr Unsigned Smin = ~Smax; // 11100000 - static constexpr Unsigned SignExtend = Unsigned(Smin << 1U); // 11000000 -}; - -/// `Compressor` is used to manipulate the bits of a (possibly signed) integer -/// type so it can be packed and unpacked into a `bits` sized integer, -/// `Compressor` is specialized on signed-ness so no runtime cost is incurred. -/// The `pack` method also checks that the passed in `UserValue` is valid. -template <typename T, unsigned Bits, bool = std::is_unsigned<T>::value> -struct Compressor { - static_assert(std::is_unsigned<T>::value, "T must be unsigned"); - using BP = BitPatterns<T, Bits>; - - static T pack(T UserValue, T UserMaxValue) { - assert(UserValue <= UserMaxValue && "value is too big"); - assert(UserValue <= BP::Umax && "value is too big"); - return UserValue; - } - - static T unpack(T StorageValue) { return StorageValue; } -}; - -template <typename T, unsigned Bits> struct Compressor<T, Bits, false> { - static_assert(std::is_signed<T>::value, "T must be signed"); - using BP = BitPatterns<T, Bits>; - - static T pack(T UserValue, T UserMaxValue) { - assert(UserValue <= UserMaxValue && "value is too big"); - assert(UserValue <= T(BP::Smax) && "value is too big"); - assert(UserValue >= T(BP::Smin) && "value is too small"); - if (UserValue < 0) - UserValue &= ~BP::SignExtend; - return UserValue; - } - - static T unpack(T StorageValue) { - if (StorageValue >= T(BP::SignBitMask)) - StorageValue |= BP::SignExtend; - return StorageValue; - } -}; - /// Impl is where Bifield description and Storage are put together to interact /// with values. template <typename Bitfield, typename StorageType> struct Impl { static_assert(std::is_unsigned<StorageType>::value, "Storage must be unsigned"); using IntegerType = typename Bitfield::IntegerType; - using C = Compressor<IntegerType, Bitfield::Bits>; - using BP = BitPatterns<StorageType, Bitfield::Bits>; static constexpr size_t StorageBits = sizeof(StorageType) * CHAR_BIT; static_assert(Bitfield::FirstBit <= StorageBits, "Data must fit in mask"); static_assert(Bitfield::LastBit <= StorageBits, "Data must fit in mask"); - static constexpr StorageType Mask = BP::Umax << Bitfield::Shift; + static constexpr StorageType LowMask = + maskTrailingOnes<StorageType>(Bitfield::Bits); + static constexpr StorageType Mask = LowMask << Bitfield::Shift; + + /// Validates that `UserValue` fits within the bitfield's range. + static void checkValue(IntegerType UserValue, IntegerType UserMaxValue) { + assert(UserValue <= UserMaxValue && "value is too big"); + if constexpr (std::is_unsigned_v<IntegerType>) { + assert(isUInt<Bitfield::Bits>(UserValue) && "value is too big"); + } else { + static_assert(std::is_signed_v<IntegerType>, + "IntegerType must be signed"); + assert(isInt<Bitfield::Bits>(UserValue) && "value is out of range"); + } + } /// Checks `UserValue` is within bounds and packs it between `FirstBit` and /// `LastBit` of `Packed` leaving the rest unchanged. static void update(StorageType &Packed, IntegerType UserValue) { - const StorageType StorageValue = C::pack(UserValue, Bitfield::UserMaxValue); + checkValue(UserValue, Bitfield::UserMaxValue); + const StorageType StorageValue = UserValue & LowMask; Packed &= ~Mask; Packed |= StorageValue << Bitfield::Shift; } @@ -177,7 +131,9 @@ template <typename Bitfield, typename StorageType> struct Impl { /// an`IntegerType`. static IntegerType extract(StorageType Packed) { const StorageType StorageValue = (Packed & Mask) >> Bitfield::Shift; - return C::unpack(StorageValue); + if constexpr (std::is_signed_v<IntegerType>) + return SignExtend64<Bitfield::Bits>(StorageValue); + return StorageValue; } /// Interprets bits between `FirstBit` and `LastBit` of `Packed` as diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h index 164b46b..07a482d 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -182,6 +182,12 @@ m_scev_PtrToInt(const Op0_t &Op0) { return SCEVUnaryExpr_match<SCEVPtrToIntExpr, Op0_t>(Op0); } +template <typename Op0_t> +inline SCEVUnaryExpr_match<SCEVTruncateExpr, Op0_t> +m_scev_Trunc(const Op0_t &Op0) { + return m_scev_Unary<SCEVTruncateExpr>(Op0); +} + /// Match a binary SCEV. template <typename SCEVTy, typename Op0_t, typename Op1_t, SCEV::NoWrapFlags WrapFlags = SCEV::FlagAnyWrap, diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index fa21eba..f06e7ce 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -10,6 +10,24 @@ namespace llvm { +namespace memprof { +// Represents the eligibility status of a global variable for section prefix +// annotation. Other than AnnotationOk, each enum value indicates a specific +// reason for ineligibility. +enum class AnnotationKind : uint8_t { + AnnotationOK, + DeclForLinker, + ExplicitSection, + ReservedName, +}; +/// Returns the annotation kind of the global variable \p GV. +AnnotationKind getAnnotationKind(const GlobalVariable &GV); + +/// Returns true if the annotation kind of the global variable \p GV is +/// AnnotationOK. +bool IsAnnotationOK(const GlobalVariable &GV); +} // namespace memprof + /// A class that holds the constants that represent static data and their /// profile information and provides methods to operate on them. class StaticDataProfileInfo { diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index c76c83d..ff3dd0d 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -514,6 +514,12 @@ enum NodeType { /// separately rounded operations. FMAD, + /// FMULADD - Performs a * b + c, with, or without, intermediate rounding. + /// It is expected that this will be illegal for most targets, as it usually + /// makes sense to split this or use an FMA. But some targets, such as + /// WebAssembly, can directly support these semantics. + FMULADD, + /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This /// DAG node does not require that X and Y have the same type, just that /// they are both floating point. X and the result must have the same type. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 1694a33..46b3d53 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -472,7 +472,7 @@ __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEn __OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, - FuncPtrTy, VoidPtr, VoidPtrPtr, SizeTy) + FuncPtrTy, FuncPtrTy, VoidPtrPtr, SizeTy) __OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8) __OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8) __OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8) diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index 10467cc..e772095 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -231,6 +231,15 @@ public: /// from a subtraction of a value in this range and a value in \p Other. LLVM_ABI ConstantFPRange sub(const ConstantFPRange &Other) const; + /// Return a new range representing the possible values resulting + /// from a multiplication of a value in this range and a value in \p Other. + LLVM_ABI ConstantFPRange mul(const ConstantFPRange &Other) const; + + /// Return a new range representing the possible values resulting + /// from a division of a value in this range and a value in + /// \p Other. + LLVM_ABI ConstantFPRange div(const ConstantFPRange &Other) const; + /// Flush denormal values to zero according to the specified mode. /// For dynamic mode, we return the union of all possible results. LLVM_ABI void flushDenormals(DenormalMode::DenormalModeKind Mode); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 041a4ce..dacda0a 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2548,6 +2548,11 @@ public: std::optional<RoundingMode> Rounding = std::nullopt, std::optional<fp::ExceptionBehavior> Except = std::nullopt); + LLVM_ABI Value *CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name = ""); + LLVM_ABI Value *CreateSelect(Value *C, Value *True, Value *False, const Twine &Name = "", Instruction *MDFrom = nullptr); diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 632be7a..07a858f 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -535,6 +535,7 @@ def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>; def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>; +def fmuladd : SDNode<"ISD::FMULADD" , SDTFPTernaryOp, [SDNPCommutative]>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; diff --git a/llvm/include/llvm/TargetParser/RISCVTargetParser.h b/llvm/include/llvm/TargetParser/RISCVTargetParser.h index b1fca55..2ac58a5 100644 --- a/llvm/include/llvm/TargetParser/RISCVTargetParser.h +++ b/llvm/include/llvm/TargetParser/RISCVTargetParser.h @@ -161,6 +161,8 @@ inline static bool isAltFmt(unsigned VType) { return VType & 0x100; } LLVM_ABI void printVType(unsigned VType, raw_ostream &OS); +LLVM_ABI void printXSfmmVType(unsigned VType, raw_ostream &OS); + LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul); LLVM_ABI std::optional<VLMUL> getSameRatioLMUL(unsigned SEW, VLMUL VLMUL, diff --git a/llvm/include/llvm/Transforms/Coroutines/MaterializationUtils.h b/llvm/include/llvm/Transforms/Coroutines/MaterializationUtils.h index 558984f..eb2b34d 100644 --- a/llvm/include/llvm/Transforms/Coroutines/MaterializationUtils.h +++ b/llvm/include/llvm/Transforms/Coroutines/MaterializationUtils.h @@ -12,9 +12,7 @@ #ifndef LLVM_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H #define LLVM_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H -namespace llvm { - -namespace coro { +namespace llvm::coro { // True if I is trivially rematerialzable, e.g. InsertElementInst LLVM_ABI bool isTriviallyMaterializable(Instruction &I); @@ -24,8 +22,6 @@ LLVM_ABI void doRematerializations(Function &F, SuspendCrossingInfo &Checker, std::function<bool(Instruction &)> IsMaterializable); -} // namespace coro - -} // namespace llvm +} // namespace llvm::coro #endif // LLVM_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H diff --git a/llvm/include/llvm/Transforms/Coroutines/SpillUtils.h b/llvm/include/llvm/Transforms/Coroutines/SpillUtils.h index 6cdf83c0..356f9ca 100644 --- a/llvm/include/llvm/Transforms/Coroutines/SpillUtils.h +++ b/llvm/include/llvm/Transforms/Coroutines/SpillUtils.h @@ -13,9 +13,7 @@ #ifndef LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H #define LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H -namespace llvm { - -namespace coro { +namespace llvm::coro { using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>; @@ -38,6 +36,7 @@ void collectSpillsAndAllocasFromInsts( SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F, const SuspendCrossingInfo &Checker, const DominatorTree &DT, const coro::Shape &Shape); + void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, const SuspendCrossingInfo &Checker); @@ -52,8 +51,6 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &DT, BasicBlock::iterator getSpillInsertionPt(const coro::Shape &, Value *Def, const DominatorTree &DT); -} // namespace coro - -} // namespace llvm +} // namespace llvm::coro #endif // LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H diff --git a/llvm/include/llvm/XRay/BlockIndexer.h b/llvm/include/llvm/XRay/BlockIndexer.h index e9782da..155e6bd 100644 --- a/llvm/include/llvm/XRay/BlockIndexer.h +++ b/llvm/include/llvm/XRay/BlockIndexer.h @@ -19,8 +19,7 @@ #include <cstdint> #include <vector> -namespace llvm { -namespace xray { +namespace llvm::xray { // The BlockIndexer will gather all related records associated with a // process+thread and group them by 'Block'. @@ -63,7 +62,6 @@ public: Error flush(); }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_BLOCKINDEXER_H diff --git a/llvm/include/llvm/XRay/BlockPrinter.h b/llvm/include/llvm/XRay/BlockPrinter.h index caf78c5..81944a5 100644 --- a/llvm/include/llvm/XRay/BlockPrinter.h +++ b/llvm/include/llvm/XRay/BlockPrinter.h @@ -18,8 +18,7 @@ #include "llvm/XRay/FDRRecords.h" #include "llvm/XRay/RecordPrinter.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class LLVM_ABI BlockPrinter : public RecordVisitor { enum class State { @@ -55,7 +54,6 @@ public: void reset() { CurrentState = State::Start; } }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_BLOCKPRINTER_H diff --git a/llvm/include/llvm/XRay/BlockVerifier.h b/llvm/include/llvm/XRay/BlockVerifier.h index b88785c..5e7b25c 100644 --- a/llvm/include/llvm/XRay/BlockVerifier.h +++ b/llvm/include/llvm/XRay/BlockVerifier.h @@ -16,8 +16,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class LLVM_ABI BlockVerifier : public RecordVisitor { public: @@ -64,7 +63,6 @@ public: void reset(); }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_BLOCKVERIFIER_H diff --git a/llvm/include/llvm/XRay/FDRLogBuilder.h b/llvm/include/llvm/XRay/FDRLogBuilder.h index f07c446..5f7b815 100644 --- a/llvm/include/llvm/XRay/FDRLogBuilder.h +++ b/llvm/include/llvm/XRay/FDRLogBuilder.h @@ -10,8 +10,7 @@ #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +namespace llvm::xray { /// The LogBuilder class allows for creating ad-hoc collections of records /// through the `add<...>(...)` function. An example use of this API is in @@ -34,7 +33,6 @@ public: std::vector<std::unique_ptr<Record>> consume() { return std::move(Records); } }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRLOGBUILDER_H diff --git a/llvm/include/llvm/XRay/FDRRecordConsumer.h b/llvm/include/llvm/XRay/FDRRecordConsumer.h index 473777f..13bb711 100644 --- a/llvm/include/llvm/XRay/FDRRecordConsumer.h +++ b/llvm/include/llvm/XRay/FDRRecordConsumer.h @@ -15,8 +15,7 @@ #include <memory> #include <vector> -namespace llvm { -namespace xray { +namespace llvm::xray { class RecordConsumer { public: @@ -48,7 +47,6 @@ public: Error consume(std::unique_ptr<Record> R) override; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRRECORDCONSUMER_H diff --git a/llvm/include/llvm/XRay/FDRRecordProducer.h b/llvm/include/llvm/XRay/FDRRecordProducer.h index 083b571..b953f62 100644 --- a/llvm/include/llvm/XRay/FDRRecordProducer.h +++ b/llvm/include/llvm/XRay/FDRRecordProducer.h @@ -14,8 +14,7 @@ #include "llvm/XRay/XRayRecord.h" #include <memory> -namespace llvm { -namespace xray { +namespace llvm::xray { class RecordProducer { public: @@ -45,7 +44,6 @@ public: Expected<std::unique_ptr<Record>> produce() override; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRRECORDPRODUCER_H diff --git a/llvm/include/llvm/XRay/FDRRecords.h b/llvm/include/llvm/XRay/FDRRecords.h index 7ee8db6..91689cae 100644 --- a/llvm/include/llvm/XRay/FDRRecords.h +++ b/llvm/include/llvm/XRay/FDRRecords.h @@ -23,8 +23,7 @@ #include "llvm/Support/Error.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class RecordVisitor; class RecordInitializer; @@ -444,7 +443,6 @@ public: Error visit(TypedEventRecord &) override; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRRECORDS_H diff --git a/llvm/include/llvm/XRay/FDRTraceExpander.h b/llvm/include/llvm/XRay/FDRTraceExpander.h index 197c123..ca400c9 100644 --- a/llvm/include/llvm/XRay/FDRTraceExpander.h +++ b/llvm/include/llvm/XRay/FDRTraceExpander.h @@ -17,8 +17,7 @@ #include "llvm/XRay/FDRRecords.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class TraceExpander : public RecordVisitor { // Type-erased callback for handling individual XRayRecord instances. @@ -56,7 +55,6 @@ public: Error flush(); }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRTRACEEXPANDER_H diff --git a/llvm/include/llvm/XRay/FDRTraceWriter.h b/llvm/include/llvm/XRay/FDRTraceWriter.h index a3dc58e..957039d 100644 --- a/llvm/include/llvm/XRay/FDRTraceWriter.h +++ b/llvm/include/llvm/XRay/FDRTraceWriter.h @@ -18,8 +18,7 @@ #include "llvm/XRay/FDRRecords.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { /// The FDRTraceWriter allows us to hand-craft an XRay Flight Data Recorder /// (FDR) mode log file. This is used primarily for testing, generating @@ -50,7 +49,6 @@ private: support::endian::Writer OS; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FDRTRACEWRITER_H diff --git a/llvm/include/llvm/XRay/FileHeaderReader.h b/llvm/include/llvm/XRay/FileHeaderReader.h index ecdb975..758ca29 100644 --- a/llvm/include/llvm/XRay/FileHeaderReader.h +++ b/llvm/include/llvm/XRay/FileHeaderReader.h @@ -19,15 +19,13 @@ #include "llvm/XRay/XRayRecord.h" #include <cstdint> -namespace llvm { -namespace xray { +namespace llvm::xray { /// Convenience function for loading the file header given a data extractor at a /// specified offset. LLVM_ABI Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, uint64_t &OffsetPtr); -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_FILEHEADERREADER_H diff --git a/llvm/include/llvm/XRay/Graph.h b/llvm/include/llvm/XRay/Graph.h index 07b418b..8521e09 100644 --- a/llvm/include/llvm/XRay/Graph.h +++ b/llvm/include/llvm/XRay/Graph.h @@ -23,8 +23,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Error.h" -namespace llvm { -namespace xray { +namespace llvm::xray { /// A Graph object represents a Directed Graph and is used in XRay to compute /// and store function call graphs and associated statistical information. @@ -485,6 +484,6 @@ public: return p; } }; -} -} +} // namespace llvm::xray + #endif diff --git a/llvm/include/llvm/XRay/InstrumentationMap.h b/llvm/include/llvm/XRay/InstrumentationMap.h index b5371478..c5e7ebf 100644 --- a/llvm/include/llvm/XRay/InstrumentationMap.h +++ b/llvm/include/llvm/XRay/InstrumentationMap.h @@ -23,9 +23,7 @@ #include <unordered_map> #include <vector> -namespace llvm { - -namespace xray { +namespace llvm::xray { // Forward declare to make a friend. class InstrumentationMap; @@ -102,11 +100,11 @@ public: const SledContainer &sleds() const { return Sleds; }; }; -} // end namespace xray - -namespace yaml { +} // end namespace llvm::xray -template <> struct ScalarEnumerationTraits<xray::SledEntry::FunctionKinds> { +namespace llvm { +template <> +struct yaml::ScalarEnumerationTraits<xray::SledEntry::FunctionKinds> { static void enumeration(IO &IO, xray::SledEntry::FunctionKinds &Kind) { IO.enumCase(Kind, "function-enter", xray::SledEntry::FunctionKinds::ENTRY); IO.enumCase(Kind, "function-exit", xray::SledEntry::FunctionKinds::EXIT); @@ -118,7 +116,7 @@ template <> struct ScalarEnumerationTraits<xray::SledEntry::FunctionKinds> { } }; -template <> struct MappingTraits<xray::YAMLXRaySledEntry> { +template <> struct yaml::MappingTraits<xray::YAMLXRaySledEntry> { static void mapping(IO &IO, xray::YAMLXRaySledEntry &Entry) { IO.mapRequired("id", Entry.FuncId); IO.mapRequired("address", Entry.Address); @@ -131,10 +129,7 @@ template <> struct MappingTraits<xray::YAMLXRaySledEntry> { static constexpr bool flow = true; }; - -} // end namespace yaml - -} // end namespace llvm +} // namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(xray::YAMLXRaySledEntry) diff --git a/llvm/include/llvm/XRay/Profile.h b/llvm/include/llvm/XRay/Profile.h index e30c01e..b5b8dd2 100644 --- a/llvm/include/llvm/XRay/Profile.h +++ b/llvm/include/llvm/XRay/Profile.h @@ -22,8 +22,7 @@ #include <utility> #include <vector> -namespace llvm { -namespace xray { +namespace llvm::xray { class Profile; @@ -144,7 +143,6 @@ public: bool empty() const { return Blocks.empty(); } }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif diff --git a/llvm/include/llvm/XRay/RecordPrinter.h b/llvm/include/llvm/XRay/RecordPrinter.h index 5d2c277..3281221 100644 --- a/llvm/include/llvm/XRay/RecordPrinter.h +++ b/llvm/include/llvm/XRay/RecordPrinter.h @@ -17,8 +17,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +namespace llvm::xray { class LLVM_ABI RecordPrinter : public RecordVisitor { raw_ostream &OS; @@ -44,7 +43,6 @@ public: Error visit(TypedEventRecord &) override; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_RECORDPRINTER_H diff --git a/llvm/include/llvm/XRay/Trace.h b/llvm/include/llvm/XRay/Trace.h index 5e4e40a..13ada22 100644 --- a/llvm/include/llvm/XRay/Trace.h +++ b/llvm/include/llvm/XRay/Trace.h @@ -21,8 +21,7 @@ #include "llvm/Support/Error.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { /// A Trace object represents the records that have been loaded from XRay /// log files generated by instrumented binaries. We encapsulate the logic of @@ -76,7 +75,6 @@ LLVM_ABI Expected<Trace> loadTraceFile(StringRef Filename, bool Sort = false); LLVM_ABI Expected<Trace> loadTrace(const DataExtractor &Extractor, bool Sort = false); -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_TRACE_H diff --git a/llvm/include/llvm/XRay/XRayRecord.h b/llvm/include/llvm/XRay/XRayRecord.h index 238bf3d..8f3440c 100644 --- a/llvm/include/llvm/XRay/XRayRecord.h +++ b/llvm/include/llvm/XRay/XRayRecord.h @@ -18,8 +18,7 @@ #include <vector> #include <string> -namespace llvm { -namespace xray { +namespace llvm::xray { /// XRay traces all have a header providing some top-matter information useful /// to help tools determine how to interpret the information available in the @@ -98,7 +97,6 @@ struct XRayRecord { std::string Data; }; -} // namespace xray -} // namespace llvm +} // namespace llvm::xray #endif // LLVM_XRAY_XRAYRECORD_H diff --git a/llvm/include/llvm/XRay/YAMLXRayRecord.h b/llvm/include/llvm/XRay/YAMLXRayRecord.h index 6062606..6bf4f1d 100644 --- a/llvm/include/llvm/XRay/YAMLXRayRecord.h +++ b/llvm/include/llvm/XRay/YAMLXRayRecord.h @@ -17,8 +17,7 @@ #include "llvm/Support/YAMLTraits.h" #include "llvm/XRay/XRayRecord.h" -namespace llvm { -namespace xray { +namespace llvm::xray { struct YAMLXRayFileHeader { uint16_t Version; @@ -46,13 +45,12 @@ struct YAMLXRayTrace { std::vector<YAMLXRayRecord> Records; }; -} // namespace xray - -namespace yaml { +} // namespace llvm::xray +namespace llvm { // YAML Traits // ----------- -template <> struct ScalarEnumerationTraits<xray::RecordTypes> { +template <> struct yaml::ScalarEnumerationTraits<xray::RecordTypes> { static void enumeration(IO &IO, xray::RecordTypes &Type) { IO.enumCase(Type, "function-enter", xray::RecordTypes::ENTER); IO.enumCase(Type, "function-exit", xray::RecordTypes::EXIT); @@ -63,7 +61,7 @@ template <> struct ScalarEnumerationTraits<xray::RecordTypes> { } }; -template <> struct MappingTraits<xray::YAMLXRayFileHeader> { +template <> struct yaml::MappingTraits<xray::YAMLXRayFileHeader> { static void mapping(IO &IO, xray::YAMLXRayFileHeader &Header) { IO.mapRequired("version", Header.Version); IO.mapRequired("type", Header.Type); @@ -73,7 +71,7 @@ template <> struct MappingTraits<xray::YAMLXRayFileHeader> { } }; -template <> struct MappingTraits<xray::YAMLXRayRecord> { +template <> struct yaml::MappingTraits<xray::YAMLXRayRecord> { static void mapping(IO &IO, xray::YAMLXRayRecord &Record) { IO.mapRequired("type", Record.RecordType); IO.mapOptional("func-id", Record.FuncId); @@ -90,7 +88,7 @@ template <> struct MappingTraits<xray::YAMLXRayRecord> { static constexpr bool flow = true; }; -template <> struct MappingTraits<xray::YAMLXRayTrace> { +template <> struct yaml::MappingTraits<llvm::xray::YAMLXRayTrace> { static void mapping(IO &IO, xray::YAMLXRayTrace &Trace) { // A trace file contains two parts, the header and the list of all the // trace records. @@ -98,8 +96,6 @@ template <> struct MappingTraits<xray::YAMLXRayTrace> { IO.mapRequired("records", Trace.Records); } }; - -} // namespace yaml } // namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(xray::YAMLXRayRecord) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b5b4cd9..00c3dbb 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5419,20 +5419,15 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, if (SourceBits != NewBits) return nullptr; - const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op); - const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op); - if (!SExt && !ZExt) - return nullptr; - const SCEVTruncateExpr *Trunc = - SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand()) - : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand()); - if (!Trunc) - return nullptr; - const SCEV *X = Trunc->getOperand(); - if (X != SymbolicPHI) - return nullptr; - Signed = SExt != nullptr; - return Trunc->getType(); + if (match(Op, m_scev_SExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = true; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + if (match(Op, m_scev_ZExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = false; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + return nullptr; } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { @@ -15428,20 +15423,18 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, // Try to match 'zext (trunc A to iB) to iY', which is used // for URem with constant power-of-2 second operands. Make sure the size of // the operand A matches the size of the whole expressions. - if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr)) - if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) { - LHS = Trunc->getOperand(); - // Bail out if the type of the LHS is larger than the type of the - // expression for now. - if (getTypeSizeInBits(LHS->getType()) > - getTypeSizeInBits(Expr->getType())) - return false; - if (LHS->getType() != Expr->getType()) - LHS = getZeroExtendExpr(LHS, Expr->getType()); - RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) - << getTypeSizeInBits(Trunc->getType())); - return true; - } + if (match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) { + Type *TruncTy = cast<SCEVZeroExtendExpr>(Expr)->getOperand()->getType(); + // Bail out if the type of the LHS is larger than the type of the + // expression for now. + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(Expr->getType())) + return false; + if (LHS->getType() != Expr->getType()) + LHS = getZeroExtendExpr(LHS, Expr->getType()); + RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) + << getTypeSizeInBits(TruncTy)); + return true; + } const auto *Add = dyn_cast<SCEVAddExpr>(Expr); if (Add == nullptr || Add->getNumOperands() != 2) return false; diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b036b2d..1f751ee 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -6,6 +6,46 @@ #include "llvm/ProfileData/InstrProf.h" using namespace llvm; + +namespace llvm { +namespace memprof { +// Returns true iff the global variable has custom section either by +// __attribute__((section("name"))) +// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) +// or #pragma clang section directives +// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). +static bool hasExplicitSectionName(const GlobalVariable &GVar) { + if (GVar.hasSection()) + return true; + + auto Attrs = GVar.getAttributes(); + if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || + Attrs.hasAttribute("relro-section") || + Attrs.hasAttribute("rodata-section")) + return true; + return false; +} + +AnnotationKind getAnnotationKind(const GlobalVariable &GV) { + if (GV.isDeclarationForLinker()) + return AnnotationKind::DeclForLinker; + // Skip 'llvm.'-prefixed global variables conservatively because they are + // often handled specially, + StringRef Name = GV.getName(); + if (Name.starts_with("llvm.")) + return AnnotationKind::ReservedName; + // Respect user-specified custom data sections. + if (hasExplicitSectionName(GV)) + return AnnotationKind::ExplicitSection; + return AnnotationKind::AnnotationOK; +} + +bool IsAnnotationOK(const GlobalVariable &GV) { + return getAnnotationKind(GV) == AnnotationKind::AnnotationOK; +} +} // namespace memprof +} // namespace llvm + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional<uint64_t> Count) { if (!Count) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 6356d71..873ac8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -20,7 +20,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -namespace llvm { +using namespace llvm; AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {} @@ -90,5 +90,3 @@ void AIXException::endFunction(const MachineFunction *MF) { emitExceptionInfoTable(LSDALabel, PerSym); } - -} // End of namespace llvm diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index 260ce8f..93ae548 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -85,8 +85,7 @@ template <> struct llvm::DenseMapInfo<VariableID> { using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>; -namespace std { -template <> struct hash<VarLocInsertPt> { +template <> struct std::hash<VarLocInsertPt> { using argument_type = VarLocInsertPt; using result_type = std::size_t; @@ -94,7 +93,6 @@ template <> struct hash<VarLocInsertPt> { return std::hash<void *>()(Arg.getOpaqueValue()); } }; -} // namespace std /// Helper class to build FunctionVarLocs, since that class isn't easy to /// modify. TODO: There's not a great deal of value in the split, it could be diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index fd7df6b..47b7a88 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -207,9 +207,7 @@ bool ApplyCloning(MachineFunction &MF, } return AnyPathsCloned; } -} // end anonymous namespace -namespace llvm { class BasicBlockPathCloning : public MachineFunctionPass { public: static char ID; @@ -229,7 +227,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -} // namespace llvm +} // namespace char BasicBlockPathCloning::ID = 0; INITIALIZE_PASS_BEGIN( diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 28e6728..1846880 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -31,7 +31,7 @@ using namespace llvm; -namespace llvm { +namespace { class BreakFalseDeps : public MachineFunctionPass { private: @@ -95,7 +95,7 @@ private: void processUndefReads(MachineBasicBlock *); }; -} // namespace llvm +} // namespace #define DEBUG_TYPE "break-false-deps" diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 6c2a5a7..87ada87 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -126,8 +126,7 @@ hash_code hash_value(const ComplexValue &Arg) { } // end namespace typedef SmallVector<struct ComplexValue, 2> ComplexValues; -namespace llvm { -template <> struct DenseMapInfo<ComplexValue> { +template <> struct llvm::DenseMapInfo<ComplexValue> { static inline ComplexValue getEmptyKey() { return {DenseMapInfo<Value *>::getEmptyKey(), DenseMapInfo<Value *>::getEmptyKey()}; @@ -144,7 +143,6 @@ template <> struct DenseMapInfo<ComplexValue> { return LHS.Real == RHS.Real && LHS.Imag == RHS.Imag; } }; -} // end namespace llvm namespace { template <typename T, typename IterT> diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index f4335396..50dd66f 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -81,13 +81,10 @@ void EdgeBundles::init() { } } -namespace llvm { - /// Specialize WriteGraph, the standard implementation won't work. -template<> -raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, - bool ShortNames, - const Twine &Title) { +template <> +raw_ostream &llvm::WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; @@ -107,8 +104,6 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, return O; } -} // end namespace llvm - /// view - Visualize the annotated bipartite CFG with Graphviz. void EdgeBundles::view() const { ViewGraph(*this, "EdgeBundles"); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index c500357..04c7008 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -1036,6 +1036,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI, continue; addToWorklist(I, Worklist); + Modified = true; break; } default: diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 47640c4a..81ab317 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -587,16 +587,12 @@ public: } // namespace char GlobalMergeFuncPassWrapper::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) -INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) +INITIALIZE_PASS(GlobalMergeFuncPassWrapper, "global-merge-func", + "Global merge function pass", false, false) -namespace llvm { -ModulePass *createGlobalMergeFuncPass() { +ModulePass *llvm::createGlobalMergeFuncPass() { return new GlobalMergeFuncPassWrapper(); } -} // namespace llvm GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) { initializeGlobalMergeFuncPassWrapperPass( diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 3485a27..0e38017 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -101,15 +101,11 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG -namespace llvm { - -cl::opt<bool> UseSegmentSetForPhysRegs( +cl::opt<bool> llvm::UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} // end namespace llvm - void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<LiveVariablesWrapperPass>(); diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index e859765..5c78d98 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -29,20 +29,17 @@ using namespace mir2vec; STATISTIC(MIRVocabMissCounter, "Number of lookups to MIR entities not present in the vocabulary"); -namespace llvm { -namespace mir2vec { -cl::OptionCategory MIR2VecCategory("MIR2Vec Options"); +cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options"); // FIXME: Use a default vocab when not specified static cl::opt<std::string> VocabFile("mir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""), cl::cat(MIR2VecCategory)); -cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), - cl::desc("Weight for machine opcode embeddings"), - cl::cat(MIR2VecCategory)); -} // namespace mir2vec -} // namespace llvm +cl::opt<float> + llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for machine opcode embeddings"), + cl::cat(MIR2VecCategory)); //===----------------------------------------------------------------------===// // Vocabulary Implementation diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index f5146f5..d988a2a 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -40,7 +40,7 @@ cl::opt<bool> ImprovedFSDiscriminator( "improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)")); -} +} // namespace llvm char MIRAddFSDiscriminators::ID = 0; diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index bc65700..cbf8867 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -23,10 +23,6 @@ using namespace llvm; -namespace llvm { -extern char &MIRNamerID; -} // namespace llvm - #define DEBUG_TYPE "mir-namer" namespace { @@ -53,10 +49,9 @@ public: VRegRenamer Renamer(MF.getRegInfo()); - unsigned BBIndex = 0; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); - for (auto &MBB : RPOT) - Changed |= Renamer.renameVRegs(MBB, BBIndex++); + for (const auto &[BBIndex, MBB] : enumerate(RPOT)) + Changed |= Renamer.renameVRegs(MBB, BBIndex); return Changed; } @@ -66,10 +61,4 @@ public: char MIRNamer::ID; -char &llvm::MIRNamerID = MIRNamer::ID; - -INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) - -INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) +INITIALIZE_PASS(MIRNamer, "mir-namer", "Rename Register Operands", false, false) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bf8a6cd..96428cd 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -107,10 +107,8 @@ struct MFPrintState { } // end anonymous namespace -namespace llvm::yaml { - /// This struct serializes the LLVM IR module. -template <> struct BlockScalarTraits<Module> { +template <> struct yaml::BlockScalarTraits<Module> { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } @@ -121,8 +119,6 @@ template <> struct BlockScalarTraits<Module> { } }; -} // end namespace llvm::yaml - static void printRegMIR(Register Reg, yaml::StringValue &Dest, const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index b2731b69..a72c2c4 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -97,7 +97,9 @@ static const bool EnableDevelopmentFeatures = false; /// this happens only in development mode. It's a no-op otherwise. namespace llvm { extern cl::opt<unsigned> EvictInterferenceCutoff; +} // namespace llvm +namespace { class RegAllocScoring : public MachineFunctionPass { public: static char ID; @@ -124,11 +126,12 @@ public: /// Performs this pass bool runOnMachineFunction(MachineFunction &) override; }; +} // namespace char RegAllocScoring::ID = 0; -FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); } - -} // namespace llvm +FunctionPass *llvm::createRegAllocScoringPass() { + return new RegAllocScoring(); +} INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", "Register Allocation Scoring Pass", false, false) diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e7fa082..26eb10f 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; #define DEBUG_TYPE "machine-block-freq" -namespace llvm { static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "view-machine-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how machine block " @@ -44,6 +43,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available."))); +namespace llvm { // Similar option above, but used to control BFI display only after MBP pass cl::opt<GVDAGType> ViewBlockLayoutWithBFI( "view-block-layout-with-bfi", cl::Hidden, @@ -69,15 +69,15 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc= extern cl::opt<unsigned> ViewHotFreqPercent; -static cl::opt<bool> PrintMachineBlockFreq( - "print-machine-bfi", cl::init(false), cl::Hidden, - cl::desc("Print the machine block frequency info.")); - // Command line option to specify the name of the function for block frequency // dump. Defined in Analysis/BlockFrequencyInfo.cpp. extern cl::opt<std::string> PrintBFIFuncName; } // namespace llvm +static cl::opt<bool> + PrintMachineBlockFreq("print-machine-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the machine block frequency info.")); + static GVDAGType getGVDT() { if (ViewBlockLayoutWithBFI != GVDT_None) return ViewBlockLayoutWithBFI; @@ -85,9 +85,7 @@ static GVDAGType getGVDT() { return ViewMachineBlockFreqPropagationDAG; } -namespace llvm { - -template <> struct GraphTraits<MachineBlockFrequencyInfo *> { +template <> struct llvm::GraphTraits<MachineBlockFrequencyInfo *> { using NodeRef = const MachineBasicBlock *; using ChildIteratorType = MachineBasicBlock::const_succ_iterator; using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>; @@ -116,7 +114,7 @@ using MBFIDOTGraphTraitsBase = MachineBranchProbabilityInfo>; template <> -struct DOTGraphTraits<MachineBlockFrequencyInfo *> +struct llvm::DOTGraphTraits<MachineBlockFrequencyInfo *> : public MBFIDOTGraphTraitsBase { const MachineFunction *CurFunc = nullptr; DenseMap<const MachineBasicBlock *, int> LayoutOrderMap; @@ -159,8 +157,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *> } }; -} // end namespace llvm - AnalysisKey MachineBlockFrequencyAnalysis::Key; MachineBlockFrequencyAnalysis::Result diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 2e92dd8..7ca4582 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -18,13 +18,8 @@ using namespace llvm; -INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) -INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) - +INITIALIZE_PASS(MachineBranchProbabilityInfoWrapperPass, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) namespace llvm { cl::opt<unsigned> StaticLikelyProb("static-likely-prob", diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 224231c..bfa5ab2 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -719,43 +719,41 @@ MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) { } } -namespace llvm { +template <> +struct llvm::DOTGraphTraits<const MachineFunction *> + : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - template<> - struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { - DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const MachineFunction *F) { + return ("CFG for '" + F->getName() + "' function").str(); + } - static std::string getGraphName(const MachineFunction *F) { - return ("CFG for '" + F->getName() + "' function").str(); + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) { + OSS << printMBBReference(*Node); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else + Node->print(OSS); } - std::string getNodeLabel(const MachineBasicBlock *Node, - const MachineFunction *Graph) { - std::string OutStr; - { - raw_string_ostream OSS(OutStr); - - if (isSimple()) { - OSS << printMBBReference(*Node); - if (const BasicBlock *BB = Node->getBasicBlock()) - OSS << ": " << BB->getName(); - } else - Node->print(OSS); - } - - if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - - // Process string output to make it nicer... - for (unsigned i = 0; i != OutStr.length(); ++i) - if (OutStr[i] == '\n') { // Left justify - OutStr[i] = '\\'; - OutStr.insert(OutStr.begin()+i+1, 'l'); - } - return OutStr; - } - }; + if (OutStr[0] == '\n') + OutStr.erase(OutStr.begin()); -} // end namespace llvm + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin() + i + 1, 'l'); + } + return OutStr; + } +}; void MachineFunction::viewCFG() const { diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0f88a7b..5111322 100644 --- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -60,13 +60,11 @@ char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", "Machine Function Printer", false, false) -namespace llvm { /// Returns a newly-created MachineFunction Printer pass. The /// default banner is empty. /// -MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, - const std::string &Banner){ +MachineFunctionPass * +llvm::createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner) { return new MachineFunctionPrinterPass(OS, Banner); } - -} diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index fdae3b4..9feb974 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -593,15 +593,12 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; -namespace llvm { -ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) { +ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) { MachineOutliner *OL = new MachineOutliner(); OL->RunOutlinerMode = RunOutlinerMode; return OL; } -} // namespace llvm - INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 89ed4da..a717d9e 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -201,16 +201,15 @@ static cl::opt<unsigned> SwpMaxNumStores( cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200)); -namespace llvm { - // A command line option to enable the CopyToPhi DAG mutation. -cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, - cl::init(true), - cl::desc("Enable CopyToPhi DAG Mutation")); +cl::opt<bool> + llvm::SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), + cl::desc("Enable CopyToPhi DAG Mutation")); /// A command line argument to force pipeliner to use specified issue /// width. -cl::opt<int> SwpForceIssueWidth( +cl::opt<int> llvm::SwpForceIssueWidth( "pipeliner-force-issue-width", cl::desc("Force pipeliner to use specified issue width."), cl::Hidden, cl::init(-1)); @@ -226,8 +225,6 @@ static cl::opt<WindowSchedulingFlag> WindowSchedulingOption( clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm."))); -} // end namespace llvm - unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; char MachinePipeliner::ID = 0; #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 299bcc4..3ed1045 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -176,9 +176,7 @@ STATISTIC(NumNodeOrderPostRA, STATISTIC(NumFirstValidPostRA, "Number of scheduling units chosen for FirstValid heuristic post-RA"); -namespace llvm { - -cl::opt<MISched::Direction> PreRADirection( +cl::opt<MISched::Direction> llvm::PreRADirection( "misched-prera-direction", cl::Hidden, cl::desc("Pre reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), @@ -206,33 +204,31 @@ static cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); -cl::opt<bool> VerifyScheduling( +cl::opt<bool> llvm::VerifyScheduling( "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); #ifndef NDEBUG -cl::opt<bool> ViewMISchedDAGs( +cl::opt<bool> llvm::ViewMISchedDAGs( "view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); -cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, - cl::desc("Print schedule DAGs")); -cl::opt<bool> MISchedDumpReservedCycles( +cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); +static cl::opt<bool> MISchedDumpReservedCycles( "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary.")); -cl::opt<bool> MischedDetailResourceBooking( +static cl::opt<bool> MischedDetailResourceBooking( "misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle.")); #else -const bool ViewMISchedDAGs = false; -const bool PrintDAGs = false; -const bool MischedDetailResourceBooking = false; +const bool llvm::ViewMISchedDAGs = false; +const bool llvm::PrintDAGs = false; +static const bool MischedDetailResourceBooking = false; #ifdef LLVM_ENABLE_DUMP -const bool MISchedDumpReservedCycles = false; +static const bool MISchedDumpReservedCycles = false; #endif // LLVM_ENABLE_DUMP #endif // NDEBUG -} // end namespace llvm - #ifndef NDEBUG /// In some situations a few uninteresting nodes depend on nearly all other /// nodes in the graph, provide a cutoff to hide them. @@ -2053,28 +2049,24 @@ public: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createLoadClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createLoadClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } std::unique_ptr<ScheduleDAGMutation> -createStoreClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createStoreClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } -} // end namespace llvm - // Sorting all the loads/stores first, then for each load/store, checking the // following load/store one by one, until reach the first non-dependent one and // call target hook to see if they can cluster. @@ -2304,16 +2296,12 @@ protected: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createCopyConstrainDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { +llvm::createCopyConstrainDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { return std::make_unique<CopyConstrain>(TII, TRI); } -} // end namespace llvm - /// constrainLocalCopy handles two possibilities: /// 1) Local src: /// I0: = dst @@ -3445,14 +3433,13 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { } #endif -namespace llvm { /// Return true if this heuristic determines order. /// TODO: Consider refactor return type of these functions as integer or enum, /// as we may need to differentiate whether TryCand is better than Cand. -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -3465,10 +3452,10 @@ bool tryLess(int TryVal, int CandVal, return false; } -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -3481,9 +3468,9 @@ bool tryGreater(int TryVal, int CandVal, return false; } -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool llvm::tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { // Prefer the candidate with the lesser depth, but only if one of them has // depth greater than the total latency scheduled so far, otherwise either @@ -3513,7 +3500,6 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, } return false; } -} // end namespace llvm static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA = false) { @@ -3798,14 +3784,12 @@ void GenericScheduler::registerRoots() { } } -namespace llvm { -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -3838,7 +3822,7 @@ bool tryPressure(const PressureChange &TryP, return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } -unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -3849,7 +3833,7 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) { /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysReg(const SUnit *SU, bool isTop) { +int llvm::biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (MI->isCopy()) { @@ -3884,7 +3868,6 @@ int biasPhysReg(const SUnit *SU, bool isTop) { return 0; } -} // end namespace llvm void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -4812,13 +4795,13 @@ static MachineSchedRegistry ShufflerRegistry( //===----------------------------------------------------------------------===// #ifndef NDEBUG -namespace llvm { -template<> struct GraphTraits< - ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; +template <> +struct llvm::GraphTraits<ScheduleDAGMI *> : public GraphTraits<ScheduleDAG *> { +}; -template<> -struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAGMI *> : public DefaultDOTGraphTraits { DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { @@ -4878,7 +4861,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } }; -} // end namespace llvm #endif // NDEBUG /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index c2d4aa0..9ac3f741 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -485,10 +485,7 @@ struct LoopBounds { // Specialize po_iterator_storage in order to prune the post-order traversal so // it is limited to the current loop and doesn't traverse the loop back edges. -namespace llvm { - -template<> -class po_iterator_storage<LoopBounds, true> { +template <> class llvm::po_iterator_storage<LoopBounds, true> { LoopBounds &LB; public: @@ -519,8 +516,6 @@ public: } }; -} // end namespace llvm - /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index 087ac62..59c587c 100644 --- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -9,7 +9,7 @@ #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/ADT/STLExtras.h" -namespace llvm { +using namespace llvm; DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { auto I = Strings.try_emplace(S); @@ -43,5 +43,3 @@ NonRelocatableStringpool::getEntriesForEmission() const { }); return Result; } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 6f373a5..e9ffa85 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -76,8 +76,6 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safe-stack" -namespace llvm { - STATISTIC(NumFunctions, "Total number of functions"); STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack"); STATISTIC(NumUnsafeStackRestorePointsFunctions, @@ -89,8 +87,6 @@ STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); -} // namespace llvm - /// Use __safestack_pointer_address even if the platform has a faster way of /// access safe stack pointer. static cl::opt<bool> diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index eae2e8c..3268c26 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1551,14 +1551,10 @@ LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } -namespace llvm { - LLVM_ATTRIBUTE_UNUSED -raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } -} // end namespace llvm - #endif diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index e7b1494..c80eade 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -16,57 +16,51 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace llvm { - template<> - struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAG *> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(const ScheduleDAG *G) { - return std::string(G->MF.getName()); - } + static std::string getGraphName(const ScheduleDAG *G) { + return std::string(G->MF.getName()); + } - static bool renderGraphFromBottomUp() { - return true; - } + static bool renderGraphFromBottomUp() { return true; } - static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); - } + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } - static std::string getNodeIdentifierLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - std::string R; - raw_string_ostream OS(R); - OS << static_cast<const void *>(Node); - return R; - } + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast<const void *>(Node); + return R; + } - /// If you want to override the dot attributes printed for a particular - /// edge, override this method. - static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI, - const ScheduleDAG *Graph) { - if (EI.isArtificialDep()) - return "color=cyan,style=dashed"; - if (EI.isCtrlDep()) - return "color=blue,style=dashed"; - return ""; - } + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } - std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { - return "shape=Mrecord"; - } - - static void addCustomGraphFeatures(ScheduleDAG *G, - GraphWriter<ScheduleDAG*> &GW) { - return G->addCustomGraphFeatures(GW); - } - }; -} + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG *> &GW) { + return G->addCustomGraphFeatures(GW); + } +}; std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1accdd..e153842 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -509,6 +509,7 @@ namespace { SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); SDValue visitFMAD(SDNode *N); + SDValue visitFMULADD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); case ISD::FMAD: return visitFMAD(N); + case ISD::FMULADD: return visitFMULADD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -18444,6 +18446,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMULADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMULADD. + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2})) + return C; + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2b8dd60..4512c5c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5786,6 +5786,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FCOPYSIGN: case ISD::FMA: case ISD::FMAD: + case ISD::FMULADD: case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: @@ -5904,6 +5905,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, case ISD::FCOSH: case ISD::FTANH: case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (SNaN) return true; @@ -7231,7 +7233,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Handle fma/fmad special cases. - if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[2].getValueType() == VT && "FMA types must match!"); @@ -7242,7 +7244,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, APFloat V1 = C1->getValueAPF(); const APFloat &V2 = C2->getValueAPF(); const APFloat &V3 = C3->getValueAPF(); - if (Opcode == ISD::FMAD) { + if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { V1.multiply(V2, APFloat::rmNearestTiesToEven); V1.add(V3, APFloat::rmNearestTiesToEven); } else diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a..0f2b518 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); + } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) { + // TODO: Support splitting the vector. + setValue(&I, DAG.getNode(ISD::FMULADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6..39cbfad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMA: return "fma"; case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; + case ISD::FMULADD: return "fmuladd"; case ISD::FREM: return "frem"; case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d3..920dff9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, break; } case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (!Flags.hasNoSignedZeros()) break; diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index 64e5cd5..95a9c3f 100644 --- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -306,10 +306,7 @@ char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisLegacy::ID; INITIALIZE_PASS(StackFrameLayoutAnalysisLegacy, "stack-frame-layout", "Stack Frame Layout", false, false) -namespace llvm { /// Returns a newly-created StackFrameLayout pass. -MachineFunctionPass *createStackFrameLayoutAnalysisPass() { +MachineFunctionPass *llvm::createStackFrameLayoutAnalysisPass() { return new StackFrameLayoutAnalysisLegacy(); } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 53a9ab4..eac20120 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -75,22 +75,11 @@ bool StaticDataAnnotator::runOnModule(Module &M) { bool Changed = false; for (auto &GV : M.globals()) { - if (GV.isDeclarationForLinker()) + if (!llvm::memprof::IsAnnotationOK(GV)) continue; - // The implementation below assumes prior passes don't set section prefixes, - // and specifically do 'assign' rather than 'update'. So report error if a - // section prefix is already set. - if (auto maybeSectionPrefix = GV.getSectionPrefix(); - maybeSectionPrefix && !maybeSectionPrefix->empty()) - llvm::report_fatal_error("Global variable " + GV.getName() + - " already has a section prefix " + - *maybeSectionPrefix); - StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty()) - continue; - + // setSectionPrefix returns true if the section prefix is updated. Changed |= GV.setSectionPrefix(SectionPrefix); } diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e22dc25..1593a40 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -130,10 +130,8 @@ StaticDataSplitter::getConstant(const MachineOperand &Op, if (Op.isGlobal()) { // Find global variables with local linkage. const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'llvm.'-prefixed global variables conservatively because they are - // often handled specially, and skip those not in static data - // sections. - if (!GV || GV->getName().starts_with("llvm.") || + // Skip those not eligible for annotation or not in static data sections. + if (!GV || !llvm::memprof::IsAnnotationOK(*GV) || !inStaticDataSection(*GV, TM)) return nullptr; return GV; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c23281a..060b1dd 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() { ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, - ISD::FTANH, ISD::FATAN2}, + ISD::FTANH, ISD::FATAN2, + ISD::FMULADD}, VT, Expand); // Overflow operations default to expand diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index c9e4618..971f822 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -102,10 +102,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, return true; } -namespace llvm { - -Printable printReg(Register Reg, const TargetRegisterInfo *TRI, - unsigned SubIdx, const MachineRegisterInfo *MRI) { +Printable llvm::printReg(Register Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; @@ -135,7 +133,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI, }); } -Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { // Generic printout when TRI is missing. if (!TRI) { @@ -158,7 +156,7 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (Register::isVirtualRegister(Unit)) { OS << '%' << Register(Unit).virtRegIndex(); @@ -168,8 +166,9 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, - const TargetRegisterInfo *TRI) { +Printable llvm::printRegClassOrBank(Register Reg, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { if (RegInfo.getRegClassOrNull(Reg)) OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); @@ -183,8 +182,6 @@ Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, }); } -} // end namespace llvm - /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index e9c058e..5b87686 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -528,3 +528,147 @@ void ConstantFPRange::flushDenormals(DenormalMode::DenormalModeKind Mode) { Lower = minnum(Lower, APFloat::getZero(Sem, ZeroLowerNegative)); Upper = maxnum(Upper, APFloat::getZero(Sem, ZeroUpperNegative)); } + +/// Represent a contiguous range of values sharing the same sign. +struct SameSignRange { + bool HasZero; + bool HasNonZero; + bool HasInf; + // The lower and upper bounds of the range (inclusive). + // The sign is dropped and infinities are excluded. + std::optional<std::pair<APFloat, APFloat>> FinitePart; + + explicit SameSignRange(const APFloat &Lower, const APFloat &Upper) + : HasZero(Lower.isZero()), HasNonZero(!Upper.isZero()), + HasInf(Upper.isInfinity()) { + assert(!Lower.isNegative() && !Upper.isNegative() && + "The sign should be dropped."); + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Empty set."); + if (!Lower.isInfinity()) + FinitePart = {Lower, + HasInf ? APFloat::getLargest(Lower.getSemantics()) : Upper}; + } +}; + +/// Split the range into positive and negative components. +static void splitPosNeg(const APFloat &Lower, const APFloat &Upper, + std::optional<SameSignRange> &NegPart, + std::optional<SameSignRange> &PosPart) { + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Non-NaN part is empty."); + if (Lower.isNegative() == Upper.isNegative()) { + if (Lower.isNegative()) + NegPart = SameSignRange{abs(Upper), abs(Lower)}; + else + PosPart = SameSignRange{Lower, Upper}; + return; + } + auto &Sem = Lower.getSemantics(); + NegPart = SameSignRange{APFloat::getZero(Sem), abs(Lower)}; + PosPart = SameSignRange{APFloat::getZero(Sem), Upper}; +} + +ConstantFPRange ConstantFPRange::mul(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // 0 * inf = QNaN + ResMayBeQNaN |= LHS->HasZero && RHS->HasInf; + ResMayBeQNaN |= RHS->HasZero && LHS->HasInf; + // NonZero * inf = inf + if ((LHS->HasInf && RHS->HasNonZero) || (RHS->HasInf && LHS->HasNonZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite * Finite + if (LHS->FinitePart && RHS->FinitePart) { + APFloat NewLower = LHS->FinitePart->first * RHS->FinitePart->first; + APFloat NewUpper = LHS->FinitePart->second * RHS->FinitePart->second; + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::div(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // inf / inf = QNaN 0 / 0 = QNaN + ResMayBeQNaN |= LHS->HasInf && RHS->HasInf; + ResMayBeQNaN |= LHS->HasZero && RHS->HasZero; + // It is not straightforward to infer HasNonZeroFinite = HasFinite && + // HasNonZero. By definitions we have: + // HasFinite = HasNonZeroFinite || HasZero + // HasNonZero = HasNonZeroFinite || HasInf + // Since the range is contiguous, if both HasFinite and HasNonZero are true, + // HasNonZeroFinite must be true. + bool LHSHasNonZeroFinite = LHS->FinitePart && LHS->HasNonZero; + bool RHSHasNonZeroFinite = RHS->FinitePart && RHS->HasNonZero; + // inf / Finite = inf FiniteNonZero / 0 = inf + if ((LHS->HasInf && RHS->FinitePart) || + (LHSHasNonZeroFinite && RHS->HasZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite / inf = 0 + if (LHS->FinitePart && RHS->HasInf) { + APFloat Zero = APFloat::getZero(Sem, /*Negative=*/Negative); + ResLower = minnum(ResLower, Zero); + ResUpper = maxnum(ResUpper, Zero); + } + // Finite / FiniteNonZero + if (LHS->FinitePart && RHSHasNonZeroFinite) { + assert(!RHS->FinitePart->second.isZero() && + "Divisor should be non-zero."); + APFloat NewLower = LHS->FinitePart->first / RHS->FinitePart->second; + APFloat NewUpper = LHS->FinitePart->second / + (RHS->FinitePart->first.isZero() + ? APFloat::getSmallest(Sem, /*Negative=*/false) + : RHS->FinitePart->first); + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 614c3a9..15c0198 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -1002,6 +1003,18 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( return C; } +Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name) { + Value *Ret = CreateSelectFMF(C, True, False, {}, Name); + if (auto *SI = dyn_cast<SelectInst>(Ret)) { + setExplicitlyUnknownBranchWeightsIfProfiled( + *SI, *SI->getParent()->getParent(), PassName); + } + return Ret; +} + Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, const Twine &Name, Instruction *MDFrom) { return CreateSelectFMF(C, True, False, {}, Name, MDFrom); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 2ea3a24..afce803 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1363,9 +1363,12 @@ const Init *BinOpInit::Fold(const Record *CurRec) const { } case LISTSPLAT: { const auto *Value = dyn_cast<TypedInit>(LHS); - const auto *Size = dyn_cast<IntInit>(RHS); - if (Value && Size) { - SmallVector<const Init *, 8> Args(Size->getValue(), Value); + const auto *Count = dyn_cast<IntInit>(RHS); + if (Value && Count) { + if (Count->getValue() < 0) + PrintFatalError(Twine("!listsplat count ") + Count->getAsString() + + " is negative"); + SmallVector<const Init *, 8> Args(Count->getValue(), Value); return ListInit::get(Args, Value->getType()); } break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index dbe74b1..5700468 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) || TII->isTRANS(MI))) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) - Result = true; + TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) { + // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). + // For our purposes, these shall not be classified as VALU as this results + // in unexpected behavior. + Result = !MI.mayLoadOrStore(); + } else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && TII->isSALU(MI)) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) && TII->isMFMAorWMMA(MI)) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 64e34db..5f6d742 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -260,8 +260,12 @@ class NSAHelper { } class MIMGNSAHelper<int num_addrs, - list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> - : NSAHelper<> { + list<RegisterOperand> addr_types_in=[]> + : NSAHelper<> { + list<RegisterOperand> addr_types = + !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs), + addr_types_in); + list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i); let AddrIns = !dag(ins, addr_types, AddrAsmNames); let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; @@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = ""> // Base class for all NSA MIMG instructions. // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[], + list<RegisterOperand> addr_types=[], RegisterOperand LastAddrRC = VGPROp_32> : MIMG<outs, dns>, MIMGe_gfx11<op> { let SubtargetPredicate = isGFX11Only; @@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", } class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[]> + list<RegisterOperand> addr_types=[]> : VIMAGE<outs, dns>, VIMAGEe<op> { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); - RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); - list<RegisterClass> GFX11PlusAddrTypes = - !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], - isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], - IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96], - true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); + RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32); + list<RegisterOperand> GFX11PlusAddrTypes = + !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32], + isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64], + IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96], + true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC> @@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC> } class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11", addr_types> { let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16)); @@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs, bit isDual, bit isBVH8, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8), (outs VReg_320:$vdata, VReg_96:$ray_origin_out, VReg_96:$ray_dir_out), diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4b54231..8851a0f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1659,6 +1659,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, -1, (1 << 5) - 1, "immediate must be non-zero in the range"); + case Match_InvalidXSfmmVType: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return generateXSfmmVTypeError(ErrorLoc); + } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return generateVTypeError(ErrorLoc); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 70b7c43..e75dfe3 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -142,6 +142,22 @@ enum { ReadsPastVLShift = DestEEWShift + 2, ReadsPastVLMask = 1ULL << ReadsPastVLShift, + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + AltFmtTypeShift = ReadsPastVLShift + 1, + AltFmtTypeMask = 3ULL << AltFmtTypeShift, + + // XSfmmbase + HasTWidenOpShift = AltFmtTypeShift + 2, + HasTWidenOpMask = 1ULL << HasTWidenOpShift, + + HasTMOpShift = HasTWidenOpShift + 1, + HasTMOpMask = 1ULL << HasTMOpShift, + + HasTKOpShift = HasTMOpShift + 1, + HasTKOpMask = 1ULL << HasTKOpShift, }; // Helper functions to read TSFlags. @@ -183,6 +199,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) { return TSFlags & HasRoundModeOpMask; } +enum class AltFmtType { DontCare, NotAltFmt, AltFmt }; +static inline AltFmtType getAltFmtType(uint64_t TSFlags) { + return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift); +} + /// \returns true if this instruction uses vxrm static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; } @@ -204,11 +225,47 @@ static inline bool readsPastVL(uint64_t TSFlags) { return TSFlags & ReadsPastVLMask; } +// XSfmmbase +static inline bool hasTWidenOp(uint64_t TSFlags) { + return TSFlags & HasTWidenOpMask; +} + +static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; } + +static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; } + +static inline unsigned getTNOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags)); + unsigned Offset = 3; + if (hasTKOp(TSFlags)) + Offset = 4; + return Desc.getNumOperands() - Offset; +} + +static inline unsigned getTMOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags)); + if (hasTKOp(TSFlags)) + return Desc.getNumOperands() - 5; + // vtzero.t + return Desc.getNumOperands() - 4; +} + +static inline unsigned getTKOpNum(const MCInstrDesc &Desc) { + [[maybe_unused]] const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags)); + return Desc.getNumOperands() - 3; +} + static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; // This method is only called if we expect to have a VL operand, and all // instructions with VL also have SEW. assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags)); + // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit. + if (hasTWidenOp(TSFlags)) + return getTNOpNum(Desc); unsigned Offset = 2; if (hasVecPolicyOp(TSFlags)) Offset = 3; @@ -226,7 +283,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; assert(hasSEWOp(TSFlags)); unsigned Offset = 1; - if (hasVecPolicyOp(TSFlags)) + if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags)) Offset = 2; return Desc.getNumOperands() - Offset; } @@ -243,6 +300,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) { if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags)) return -1; + if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags)) + return getTMOpNum(Desc) - 1; + // The operand order // -------------------------------------- // | n-1 (if any) | n-2 | n-3 | n-4 | @@ -385,7 +445,9 @@ enum OperandType : unsigned { OPERAND_SEW_MASK, // Vector rounding mode for VXRM or FRM. OPERAND_VEC_RM, - OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM, + // Vtype operand for XSfmm extension. + OPERAND_XSFMM_VTYPE, + OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index cf8d120..9ed3b97 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -168,10 +168,13 @@ struct DemandedFields { // If this is true, we demand that VTYPE is set to some legal state, i.e. that // vill is unset. bool VILL = false; + bool UseTWiden = false; + bool UseAltFmt = false; // Return true if any part of VTYPE was used bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL || + UseTWiden || UseAltFmt; } // Return true if any property of VL was used @@ -187,6 +190,8 @@ struct DemandedFields { TailPolicy = true; MaskPolicy = true; VILL = true; + UseTWiden = true; + UseAltFmt = true; } // Mark all VL properties as demanded @@ -212,6 +217,8 @@ struct DemandedFields { TailPolicy |= B.TailPolicy; MaskPolicy |= B.MaskPolicy; VILL |= B.VILL; + UseAltFmt |= B.UseAltFmt; + UseTWiden |= B.UseTWiden; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -258,7 +265,9 @@ struct DemandedFields { OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; OS << "TailPolicy=" << TailPolicy << ", "; OS << "MaskPolicy=" << MaskPolicy << ", "; - OS << "VILL=" << VILL; + OS << "VILL=" << VILL << ", "; + OS << "UseAltFmt=" << UseAltFmt << ", "; + OS << "UseTWiden=" << UseTWiden; OS << "}"; } #endif @@ -328,6 +337,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != RISCVVType::isMaskAgnostic(NewVType)) return false; + if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) != + RISCVVType::hasXSfmmWiden(NewVType) || + (RISCVVType::hasXSfmmWiden(CurVType) && + RISCVVType::getXSfmmWiden(CurVType) != + RISCVVType::getXSfmmWiden(NewVType)))) + return false; + if (Used.UseAltFmt && + RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType)) + return false; return true; } @@ -479,6 +497,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { Res.TailPolicy = false; } + Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) != + RISCVII::AltFmtType::DontCare; + Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI); + return Res; } @@ -510,6 +533,8 @@ class VSETVLIInfo { uint8_t TailAgnostic : 1; uint8_t MaskAgnostic : 1; uint8_t SEWLMULRatioOnly : 1; + uint8_t AltFmt : 1; + uint8_t TWiden : 3; public: VSETVLIInfo() @@ -586,6 +611,8 @@ public: RISCVVType::VLMUL getVLMUL() const { return VLMul; } bool getTailAgnostic() const { return TailAgnostic; } bool getMaskAgnostic() const { return MaskAgnostic; } + bool getAltFmt() const { return AltFmt; } + unsigned getTWiden() const { return TWiden; } bool hasNonZeroAVL(const LiveIntervals *LIS) const { if (hasAVLImm()) @@ -647,21 +674,31 @@ public: SEW = RISCVVType::getSEW(VType); TailAgnostic = RISCVVType::isTailAgnostic(VType); MaskAgnostic = RISCVVType::isMaskAgnostic(VType); + AltFmt = RISCVVType::isAltFmt(VType); + TWiden = + RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0; } - void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) { + void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt, + unsigned W) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; SEW = S; TailAgnostic = TA; MaskAgnostic = MA; + AltFmt = Altfmt; + TWiden = W; } + void setAltFmt(bool AF) { AltFmt = AF; } + void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; } unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); + if (TWiden != 0) + return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt); return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); } @@ -674,9 +711,9 @@ public: "Can't compare VTYPE in unknown state"); assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && "Can't compare when only LMUL/SEW ratio is valid."); - return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == + return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) == std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, - Other.MaskAgnostic); + Other.MaskAgnostic, Other.AltFmt, Other.TWiden); } unsigned getSEWLMULRatio() const { @@ -825,7 +862,9 @@ public: << "SEW=e" << (unsigned)SEW << ", " << "TailAgnostic=" << (bool)TailAgnostic << ", " << "MaskAgnostic=" << (bool)MaskAgnostic << ", " - << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", " + << "TWiden=" << (unsigned)TWiden << ", " + << "AltFmt=" << (bool)AltFmt << "}"; } #endif }; @@ -853,6 +892,11 @@ struct BlockData { BlockData() = default; }; +enum TKTMMode { + VSETTK = 0, + VSETTM = 1, +}; + class RISCVInsertVSETVLI : public MachineFunctionPass { const RISCVSubtarget *ST; const TargetInstrInfo *TII; @@ -908,6 +952,7 @@ private: VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; void forwardVSETVLIAVL(VSETVLIInfo &Info) const; + bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const; }; } // end anonymous namespace @@ -945,6 +990,18 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else if (RISCVInstrInfo::isXSfmmVectorConfigTNInstr(MI)) { + assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT || + MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0); + switch (MI.getOpcode()) { + case RISCV::PseudoSF_VSETTNTX0: + NewInfo.setAVLVLMAX(); + break; + case RISCV::PseudoSF_VSETTNT: + Register ATNReg = MI.getOperand(1).getReg(); + NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg); + break; + } } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETVLIX0); @@ -1005,11 +1062,34 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); + bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt; + InstrInfo.setAltFmt(AltFmt); + unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); // A Log2SEW of 0 is an operation on mask registers only. unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + if (RISCVII::hasTWidenOp(TSFlags)) { + const MachineOperand &TWidenOp = + MI.getOperand(MI.getNumExplicitOperands() - 1); + unsigned TWiden = TWidenOp.getImm(); + + InstrInfo.setAVLVLMAX(); + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &TNOp = + MI.getOperand(RISCVII::getTNOpNum(MI.getDesc())); + + if (TNOp.getReg().isVirtual()) + InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS), + TNOp.getReg()); + } + + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden); + + return InstrInfo; + } + if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isImm()) { @@ -1045,7 +1125,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); } #endif - InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + // TODO: Propagate the twiden from previous vtype for potential reuse. + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, + /*TWiden*/ 0); forwardVSETVLIAVL(InstrInfo); @@ -1053,10 +1135,33 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { } void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc DL, - const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { - + MachineBasicBlock::iterator InsertPt, + DebugLoc DL, const VSETVLIInfo &Info, + const VSETVLIInfo &PrevInfo) { ++NumInsertedVSETVL; + + if (Info.getTWiden()) { + if (Info.hasAVLVLMAX()) { + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()); + if (LIS) { + LIS->InsertMachineInstrInMaps(*MI); + LIS->createAndComputeVirtRegInterval(DestReg); + } + } else { + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Info.getAVLReg()) + .addImm(Info.encodeVTYPE()); + if (LIS) + LIS->InsertMachineInstrInMaps(*MI); + } + return; + } + if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same // VLMAX. @@ -1198,7 +1303,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, // be coalesced into another vsetvli since we won't demand any fields. VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly NewInfo.setAVLImm(1); - NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true); + NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true, + /*AltFmt*/ false, /*W*/ 0); Info = NewInfo; return; } @@ -1240,7 +1346,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || IncomingInfo.getTailAgnostic(), (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || - IncomingInfo.getMaskAgnostic()); + IncomingInfo.getMaskAgnostic(), + (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(), + Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0); // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep // the AVL. @@ -1293,7 +1401,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, if (RISCVInstrInfo::isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) || - isVectorCopy(ST->getRegisterInfo(), MI)) + isVectorCopy(ST->getRegisterInfo(), MI) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) HadVectorOp = true; transferAfter(Info, MI); @@ -1675,6 +1784,12 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { }; for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { + // TODO: Support XSfmm. + if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) { + NextMI = nullptr; + continue; + } if (!RISCVInstrInfo::isVectorConfigInstr(MI)) { Used.doUnion(getDemanded(MI, ST)); @@ -1788,6 +1903,65 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { } } +bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB, + TKTMMode Mode) const { + + bool Changed = false; + for (auto &MI : MBB) { + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) || + !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags)) + continue; + + VSETVLIInfo CurrInfo = computeInfoForInstr(MI); + + if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags)) + continue; + + if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags)) + continue; + + unsigned OpNum = 0; + unsigned Opcode = 0; + switch (Mode) { + case VSETTK: + OpNum = RISCVII::getTKOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTK; + break; + case VSETTM: + OpNum = RISCVII::getTMOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTM; + break; + } + + assert(OpNum && Opcode && "Invalid OpNum or Opcode"); + + MachineOperand &Op = MI.getOperand(OpNum); + + auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Op.getReg()) + .addImm(Log2_32(CurrInfo.getSEW())) + .addImm(Log2_32(CurrInfo.getTWiden()) + 1); + + Changed = true; + Register Reg = Op.getReg(); + Op.setReg(Register()); + Op.setIsKill(false); + if (LIS) { + LIS->InsertMachineInstrInMaps(*TmpMI); + LiveInterval &LI = LIS->getInterval(Reg); + + // Erase the AVL operand from the instruction. + LIS->shrinkToUses(&LI); + // TODO: Enable this once needVSETVLIPHI is supported. + // SmallVector<LiveInterval *> SplitLIs; + // LIS->splitSeparateComponents(LI, SplitLIs); + } + } + return Changed; +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. ST = &MF.getSubtarget<RISCVSubtarget>(); @@ -1865,6 +2039,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) insertReadVL(MBB); + for (MachineBasicBlock &MBB : MF) { + insertVSETMTK(MBB, VSETTM); + insertVSETMTK(MBB, VSETTK); + } + BlockInfo.clear(); return HaveVectorOp; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 2afd77a..5b06303 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -267,6 +267,22 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // operands' VLs. bit ReadsPastVL = 0; let TSFlags{26} = ReadsPastVL; + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + bits<2> AltFmtType = 0; + let TSFlags{28-27} = AltFmtType; + + // XSfmmbase + bit HasTWidenOp = 0; + let TSFlags{29} = HasTWidenOp; + + bit HasTmOp = 0; + let TSFlags{30} = HasTmOp; + + bit HasTkOp = 0; + let TSFlags{31} = HasTkOp; } class RVInst<dag outs, dag ins, string opcodestr, string argstr, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 96e1078..ddb53a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3005,6 +3005,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, else Ok = RISCVFPRndMode::isValidRoundingMode(Imm); break; + case RISCVOp::OPERAND_XSFMM_VTYPE: + Ok = RISCVVType::isValidXSfmmVType(Imm); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; @@ -3670,6 +3673,11 @@ std::string RISCVInstrInfo::createMIROperandComment( RISCVVType::printVType(Imm, OS); break; } + case RISCVOp::OPERAND_XSFMM_VTYPE: { + unsigned Imm = Op.getImm(); + RISCVVType::printXSfmmVType(Imm, OS); + break; + } case RISCVOp::OPERAND_SEW: case RISCVOp::OPERAND_SEW_MASK: { unsigned Log2SEW = Op.getImm(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 298d35a..c1b23af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -128,6 +128,9 @@ defvar TAIL_AGNOSTIC = 1; defvar TU_MU = 0; defvar TA_MU = 1; defvar TA_MA = 3; +defvar DONT_CARE_ALTFMT = 0; +defvar IS_NOT_ALTFMT = 1; +defvar IS_ALTFMT = 2; //===----------------------------------------------------------------------===// // Utilities. @@ -159,7 +162,8 @@ class PseudoToVInst<string PseudoInst> { ["_M4", ""], ["_M8", ""], ["_SE", ""], - ["_RM", ""] + ["_RM", ""], + ["_ALT", ""] ]; string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst, !subst(AffixSubst[0], AffixSubst[1], Acc)); @@ -6396,7 +6400,7 @@ let Defs = [VXSAT] in { // 13. Vector Floating-Point Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// @@ -6565,7 +6569,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 14. Vector Reduction Operations @@ -6593,7 +6597,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS; } } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 14.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// @@ -6612,7 +6616,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 15. Vector Mask Instructions @@ -6703,7 +6707,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // 16.2. Floating-Point Scalar Move Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach f = FPList in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in @@ -6718,7 +6722,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>; } } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.3. Vector Slide Instructions @@ -6730,10 +6734,10 @@ let Predicates = [HasVInstructions] in { defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX; } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">; defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF; -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.4. Vector Register Gather Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 557d873..6a4119a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -438,8 +438,10 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPList in { foreach m = f.MxList in { - defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; - defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in { + defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; + defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + } } } foreach m = MxListW in { @@ -449,7 +451,8 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPListW in { foreach m = f.MxList in - defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in + defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index a5ee701..5ad22e6b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -225,7 +225,7 @@ let Predicates = [HasVendorXSfmmbase] in { def SF_VSETTM : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00001, "sf.vsettm", "$rd, $rs1">; def SF_VSETTK : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00010, - "sf.vsettk", "$rd, $rs1">; + "sf.vsettk", "$rd, $rs1">; def SF_VTDISCARD : SFInstVtDiscard<"sf.vtdiscard">; def SF_VTMV_V_T : SFInstTileMoveOp<0b010000, (outs VR:$vd), (ins GPR:$rs1), @@ -277,3 +277,144 @@ let Uses = [FRM], mayRaiseFPException = true in { } // Predicates = [HasVendorXSfmm32a8f] } // DecoderNamespace = "XSfvector" + +class VPseudoSF_VTileLoad + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 1; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileStore + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 1; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_V_T + : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_T_V + : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn, + AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul_FRM<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm, + AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew, + ixlenimm:$twiden), []> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasRoundModeOp = 1; + let hasPostISelHook = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let Defs = [VL, VTYPE] in { + def PseudoSF_VSETTNT + : Pseudo<(outs GPR:$rd), + (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0 + : Pseudo<(outs GPRNoX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0X0 + : Pseudo<(outs GPRX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} + +let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in { + def PseudoSF_VSETTM + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTK + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} +} + +foreach eew = [8, 16, 32, 64] in { + def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad; + def PseudoSF_VSTE # eew : VPseudoSF_VTileStore; +} + +def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V; +def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T; + +foreach a = I8Encodes in + foreach b = I8Encodes in + def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name) + : VPseudoSF_MatMul<TRM4>; + +let AltFmtType = IS_NOT_ALTFMT in + def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>; +let AltFmtType = IS_ALTFMT in + def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>; + +foreach e1 = [5, 4] in + foreach e2 = [5, 4] in + def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2) + : VPseudoSF_MatMul_FRM<TRM4>; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { + let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in + def PseudoSF_VTZERO_T + : RISCVVPseudo<(outs), + (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)>; + def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 3658817..dcae977 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -78,7 +78,41 @@ def isVectorConfigInstr PseudoVSETVLI, PseudoVSETVLIX0, PseudoVSETVLIX0X0, - PseudoVSETIVLI + PseudoVSETIVLI, + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is a PseudoSF_VSETTNT* instructions. +def isXSfmmVectorConfigTNInstr + : TIIPredicate<"isXSfmmVectorConfigTNInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK. +def isXSfmmVectorConfigTMTKInstr + : TIIPredicate<"isXSfmmVectorConfigTMTKInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTM, + PseudoSF_VSETTK + ]>>>; + +// Returns true if this is a XSfmm vector configuration instruction. +def isXSfmmVectorConfigInstr + : TIIPredicate<"isXSfmmVectorConfigInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0, + PseudoSF_VSETTM, + PseudoSF_VSETTK ]>>>; // Return true if this is 'vsetvli x0, x0, vtype' which preserves diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 40b6416..e9f43b9 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -178,6 +178,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Shadow stack pointer. markSuperRegs(Reserved, RISCV::SSP); + // XSfmmbase + for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++) + markSuperRegs(Reserved, Reg); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6472334..47c24fc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom); } + if (Subtarget->hasFP16()) { + setOperationAction(ISD::FMA, MVT::v8f16, Legal); + } + + if (Subtarget->hasRelaxedSIMD()) { + setOperationAction(ISD::FMULADD, MVT::v4f32, Legal); + setOperationAction(ISD::FMULADD, MVT::v2f64, Legal); + } + // Partial MLA reductions. for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) { setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal); @@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { return TargetLoweringBase::getPreferredVectorAction(VT); } +bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + if (!Subtarget->hasFP16() || !VT.isVector()) + return false; + + EVT ScalarVT = VT.getScalarType(); + if (!ScalarVT.isSimple()) + return false; + + return ScalarVT.getSimpleVT().SimpleTy == MVT::f16; +} + bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( SDValue Op, const TargetLoweringOpt &TLO) const { // ISel process runs DAGCombiner after legalization; this step is called diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b33a853..472ec67 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -81,6 +81,8 @@ private: TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 49af78b..0f6e1ca 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U : defm EXTMUL_HIGH_U : SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; +// Pattern for i32x4.dot_i16x8_s +def : Pat< + (v4i32 (add + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 0), (i32 1), (i32 2), (i32 3), + (i32 8), (i32 9), (i32 10), (i32 11), + (i32 16), (i32 17), (i32 18), (i32 19), + (i32 24), (i32 25), (i32 26), (i32 27)), + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 4), (i32 5), (i32 6), (i32 7), + (i32 12), (i32 13), (i32 14), (i32 15), + (i32 20), (i32 21), (i32 22), (i32 23), + (i32 28), (i32 29), (i32 30), (i32 31))) + ), + (v4i32 (DOT v8i16:$lhs, v8i16:$rhs)) +>; + //===----------------------------------------------------------------------===// // Floating-point unary arithmetic //===----------------------------------------------------------------------===// @@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero, // Relaxed (Negative) Multiply-Add (madd/nmadd) //===----------------------------------------------------------------------===// -multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> { +multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { defm MADD_#vec : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd @@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c", vec.prefix#".relaxed_nmadd", simdopS, reqs>; - def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; - def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; } -defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; +defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; +defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; + +//===----------------------------------------------------------------------===// +// FP16 (Negative) Multiply-Add (madd/nmadd) +//===----------------------------------------------------------------------===// + +multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { + defm MADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".madd\t$dst, $a, $b, $c", + vec.prefix#".madd", simdopA, reqs>; + defm NMADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".nmadd\t$dst, $a, $b, $c", + vec.prefix#".nmadd", simdopS, reqs>; +} +defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; + +// TODO: I think separate intrinsics should be introduced for these FP16 operations. +def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (MADD_F16x8 V128:$a, V128:$b, V128:$c)>; +def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>; //===----------------------------------------------------------------------===// // Laneselect diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp index acf8e4c..5ea63a9 100644 --- a/llvm/lib/TargetParser/RISCVTargetParser.cpp +++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp @@ -228,6 +228,10 @@ void printVType(unsigned VType, raw_ostream &OS) { OS << ", mu"; } +void printXSfmmVType(unsigned VType, raw_ostream &OS) { + OS << "e" << getSEW(VType) << ", w" << getXSfmmWiden(VType); +} + unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul) { unsigned LMul; bool Fractional; diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index 26ec4f3..e05fe28 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -1,3 +1,4 @@ +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,9 +20,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Utils/ValueMapper.h" -namespace llvm { - -namespace coro { +namespace llvm::coro { enum class CloneKind { /// The shared resume function for a switch lowering. @@ -149,8 +148,6 @@ public: } }; -} // end namespace coro - -} // end namespace llvm +} // end namespace llvm::coro #endif // LLVM_LIB_TRANSFORMS_COROUTINES_COROCLONER_H diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 471b9eb..cdb5852 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -38,7 +38,7 @@ public: AnyResumeFnPtrTy(PointerType::getUnqual(Context)) {} void lowerEarlyIntrinsics(Function &F); }; -} +} // namespace // Replace a direct call to coro.resume or coro.destroy with an indirect call to // an address returned by coro.subfn.addr intrinsic. This is done so that diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 52f4ffe..cc47a55 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -16,11 +16,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Coroutines/CoroShape.h" -namespace llvm { - -class CallGraph; - -namespace coro { +namespace llvm::coro { bool isSuspendBlock(BasicBlock *BB); bool declaresAnyIntrinsic(const Module &M); @@ -61,7 +57,6 @@ void normalizeCoroutine(Function &F, coro::Shape &Shape, CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, TargetTransformInfo &TTI, ArrayRef<Value *> Arguments, IRBuilder<> &); -} // End namespace coro. -} // End namespace llvm +} // End namespace llvm::coro #endif diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp index 6aaabca..f2444da 100644 --- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp @@ -137,8 +137,7 @@ struct RematGraph { } // namespace -namespace llvm { -template <> struct GraphTraits<RematGraph *> { +template <> struct llvm::GraphTraits<RematGraph *> { using NodeRef = RematGraph::RematNode *; using ChildIteratorType = RematGraph::RematNode **; @@ -149,8 +148,6 @@ template <> struct GraphTraits<RematGraph *> { static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); } }; -} // end namespace llvm - // For each instruction identified as materializable across the suspend point, // and its associated DAG of other rematerializable instructions, // recreate the DAG of instructions after the suspend point. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index e474c07..81fe0c9 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -16,11 +16,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -namespace llvm { - -namespace coro { - -namespace { +using namespace llvm; +using namespace llvm::coro; typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet; @@ -71,7 +68,7 @@ static bool isLocalAlloca(CoroAllocaAllocInst *AI) { /// This happens during the all-instructions iteration, so it must not /// delete the call. static Instruction * -lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape, +lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const Shape &Shape, SmallVectorImpl<Instruction *> &DeadInsts) { IRBuilder<> Builder(AI); auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); @@ -450,10 +447,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, Visitor.getMayWriteBeforeCoroBegin()); } -} // namespace - -void collectSpillsFromArgs(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromArgs(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) @@ -461,7 +456,7 @@ void collectSpillsFromArgs(SpillInfo &Spills, Function &F, Spills[&A].push_back(cast<Instruction>(U)); } -void collectSpillsAndAllocasFromInsts( +void coro::collectSpillsAndAllocasFromInsts( SpillInfo &Spills, SmallVector<AllocaInfo, 8> &Allocas, SmallVector<Instruction *, 4> &DeadInstructions, SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F, @@ -516,8 +511,8 @@ void collectSpillsAndAllocasFromInsts( } } -void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // We don't want the layout of coroutine frame to be affected // by debug information. So we only choose to salvage dbg.values for // whose value is already in the frame. @@ -535,10 +530,9 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, /// Async and Retcon{Once} conventions assume that all spill uses can be sunk /// after the coro.begin intrinsic. -void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, - CoroBeginInst *CoroBegin, - coro::SpillInfo &Spills, - SmallVectorImpl<coro::AllocaInfo> &Allocas) { +void coro::sinkSpillUsesAfterCoroBegin( + const DominatorTree &Dom, CoroBeginInst *CoroBegin, coro::SpillInfo &Spills, + SmallVectorImpl<coro::AllocaInfo> &Allocas) { SmallSetVector<Instruction *, 32> ToMove; SmallVector<Instruction *, 32> Worklist; @@ -582,8 +576,9 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, Inst->moveBefore(InsertPt->getIterator()); } -BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, - const DominatorTree &DT) { +BasicBlock::iterator coro::getSpillInsertionPt(const coro::Shape &Shape, + Value *Def, + const DominatorTree &DT) { BasicBlock::iterator InsertPt; if (auto *Arg = dyn_cast<Argument>(Def)) { // For arguments, we will place the store instruction right after @@ -625,7 +620,3 @@ BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, return InsertPt; } - -} // End namespace coro. - -} // End namespace llvm. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7071876..943c223 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,7 +471,6 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); -public: /// Create `select C, S1, S2`. Use only when the profile cannot be calculated /// from existing profile metadata: if the Function has profiles, this will /// set the profile of this select to "unknown". @@ -484,6 +483,7 @@ public: return Sel; } +public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 63e24a0..a330bb7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -110,8 +110,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, ShrAmt->getName() + ".z"); // There is no existing !prof metadata we can derive the !prof metadata for // this select. - Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); - IC.Builder.Insert(Select); + Value *Select = IC.Builder.CreateSelectWithUnknownProfile(ShrAmtZ, Lower, + Upper, DEBUG_TYPE); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index c86092b..a6ec6c1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee, } } +static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, + AnnotationKind Kind) { + assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK && + "Should not handle AnnotationOK here"); + SmallString<32> Reason; + switch (Kind) { + case llvm::memprof::AnnotationKind::ExplicitSection: + ++NumOfMemProfExplicitSectionGlobalVars; + Reason.append("explicit section name"); + break; + case llvm::memprof::AnnotationKind::DeclForLinker: + Reason.append("linker declaration"); + break; + case llvm::memprof::AnnotationKind::ReservedName: + Reason.append("name starts with `llvm.`"); + break; + default: + llvm_unreachable("Unexpected annotation kind"); + } + LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to " + << Reason << ".\n"); + return; +} + struct AllocMatchInfo { uint64_t TotalSize = 0; AllocationType AllocType = AllocationType::None; @@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -// Returns true iff the global variable has custom section either by -// __attribute__((section("name"))) -// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) -// or #pragma clang section directives -// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). -static bool hasExplicitSectionName(const GlobalVariable &GVar) { - if (GVar.hasSection()) - return true; - - auto Attrs = GVar.getAttributes(); - if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || - Attrs.hasAttribute("relro-section") || - Attrs.hasAttribute("rodata-section")) - return true; - return false; -} - bool MemProfUsePass::annotateGlobalVariables( Module &M, const memprof::DataAccessProfData *DataAccessProf) { if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) return false; if (!DataAccessProf) { + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U); M.getContext().diagnose(DiagnosticInfoPGOProfile( MemoryProfileFileName.data(), StringRef("Data access profiles not found in memprof. Ignore " @@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables( DS_Warning)); return false; } + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U); bool Changed = false; // Iterate all global variables in the module and annotate them based on @@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables( for (GlobalVariable &GVar : M.globals()) { assert(!GVar.getSectionPrefix().has_value() && "GVar shouldn't have section prefix yet"); - if (GVar.isDeclarationForLinker()) - continue; - - if (hasExplicitSectionName(GVar)) { - ++NumOfMemProfExplicitSectionGlobalVars; - LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() - << " has explicit section name. Skip annotating.\n"); + auto Kind = llvm::memprof::getAnnotationKind(GVar); + if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) { + HandleUnsupportedAnnotationKinds(GVar, Kind); continue; } @@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables( // TODO: Track string content hash in the profiles and compute it inside the // compiler to categeorize the hotness string literals. if (Name.starts_with(".str")) { - LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); continue; } diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index ccb86eb..fb39fdd 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -269,8 +269,7 @@ static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) { bool EliminateNewDuplicatePHINodes(BasicBlock *BB, BasicBlock::phi_iterator FirstExistingPN) { - auto NewPHIs = make_range(BB->phis().begin(), FirstExistingPN); - assert(!PHIAreRefEachOther(NewPHIs)); + assert(!PHIAreRefEachOther(make_range(BB->phis().begin(), FirstExistingPN))); // Deduplicate new PHIs first to reduce the number of comparisons on the // following new -> existing pass. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f95d288..88af2cf 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -19460,7 +19460,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } assert(getNumElements(Cond->getType()) == TrueNumElements && "Cannot vectorize Instruction::Select"); - Value *V = Builder.CreateSelect(Cond, True, False); + Value *V = + Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -23580,18 +23581,19 @@ class HorizontalReduction { switch (Kind) { case RecurKind::Or: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)), - RHS, Name); + RHS, DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); } case RecurKind::And: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, RHS, - ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name); + ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), + DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); @@ -23612,7 +23614,8 @@ class HorizontalReduction { if (UseSelect) { CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind); Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE, + Name); } [[fallthrough]]; case RecurKind::FMax: diff --git a/llvm/lib/XRay/BlockIndexer.cpp b/llvm/lib/XRay/BlockIndexer.cpp index f4ba0eb..d0c6853 100644 --- a/llvm/lib/XRay/BlockIndexer.cpp +++ b/llvm/lib/XRay/BlockIndexer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockIndexer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockIndexer::visit(BufferExtents &) { return Error::success(); } @@ -89,6 +89,3 @@ Error BlockIndexer::flush() { CurrentBlock.WallclockTime = nullptr; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockPrinter.cpp b/llvm/lib/XRay/BlockPrinter.cpp index 63a60c3..d85be5b 100644 --- a/llvm/lib/XRay/BlockPrinter.cpp +++ b/llvm/lib/XRay/BlockPrinter.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockPrinter.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockPrinter::visit(BufferExtents &R) { OS << "\n[New Block]\n"; @@ -108,6 +108,3 @@ Error BlockPrinter::visit(EndBufferRecord &R) { auto E = RP.visit(R); return E; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockVerifier.cpp b/llvm/lib/XRay/BlockVerifier.cpp index 99f255e..e39f6b6 100644 --- a/llvm/lib/XRay/BlockVerifier.cpp +++ b/llvm/lib/XRay/BlockVerifier.cpp @@ -10,19 +10,18 @@ #include <bitset> -namespace llvm { -namespace xray { -namespace { +using namespace llvm; +using namespace llvm::xray; -constexpr unsigned long long mask(BlockVerifier::State S) { +static constexpr unsigned long long mask(BlockVerifier::State S) { return 1uLL << static_cast<std::size_t>(S); } -constexpr std::size_t number(BlockVerifier::State S) { +static constexpr std::size_t number(BlockVerifier::State S) { return static_cast<std::size_t>(S); } -StringRef recordToString(BlockVerifier::State R) { +static StringRef recordToString(BlockVerifier::State R) { switch (R) { case BlockVerifier::State::BufferExtents: return "BufferExtents"; @@ -53,6 +52,8 @@ StringRef recordToString(BlockVerifier::State R) { llvm_unreachable("Unkown state!"); } +namespace { + struct Transition { BlockVerifier::State From; std::bitset<number(BlockVerifier::State::StateMax)> ToStates; @@ -133,7 +134,7 @@ Error BlockVerifier::transition(State To) { CurrentRecord = To; return Error::success(); -} // namespace xray +} Error BlockVerifier::visit(BufferExtents &) { return transition(State::BufferExtents); @@ -201,6 +202,3 @@ Error BlockVerifier::verify() { } void BlockVerifier::reset() { CurrentRecord = State::Unknown; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecordProducer.cpp b/llvm/lib/XRay/FDRRecordProducer.cpp index 479b710..0f4eed1 100644 --- a/llvm/lib/XRay/FDRRecordProducer.cpp +++ b/llvm/lib/XRay/FDRRecordProducer.cpp @@ -10,8 +10,8 @@ #include <cstdint> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -31,8 +31,9 @@ enum MetadataRecordKinds : uint8_t { // This is an end marker, used to identify the upper bound for this enum. EnumEndMarker, }; +} // namespace -Expected<std::unique_ptr<Record>> +static Expected<std::unique_ptr<Record>> metadataRecordType(const XRayFileHeader &Header, uint8_t T) { if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker)) @@ -72,12 +73,10 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) { llvm_unreachable("Unhandled MetadataRecordKinds enum value"); } -constexpr bool isMetadataIntroducer(uint8_t FirstByte) { +static constexpr bool isMetadataIntroducer(uint8_t FirstByte) { return FirstByte & 0x01u; } -} // namespace - Expected<std::unique_ptr<Record>> FileBasedRecordProducer::findNextBufferExtent() { // We seek one byte at a time until we find a suitable buffer extents metadata @@ -193,6 +192,3 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() { assert(R != nullptr); return std::move(R); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecords.cpp b/llvm/lib/XRay/FDRRecords.cpp index ff315d3..a18f733 100644 --- a/llvm/lib/XRay/FDRRecords.cpp +++ b/llvm/lib/XRay/FDRRecords.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); } Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); } @@ -61,6 +61,3 @@ StringRef Record::kindToString(RecordKind K) { } return "Unknown"; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceExpander.cpp b/llvm/lib/XRay/FDRTraceExpander.cpp index b68e997..991e6e5 100644 --- a/llvm/lib/XRay/FDRTraceExpander.cpp +++ b/llvm/lib/XRay/FDRTraceExpander.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRTraceExpander.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; void TraceExpander::resetCurrentRecord() { if (BuildingRecord) @@ -126,6 +126,3 @@ Error TraceExpander::flush() { resetCurrentRecord(); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp index fb59125..3e320a6 100644 --- a/llvm/lib/XRay/FDRTraceWriter.cpp +++ b/llvm/lib/XRay/FDRTraceWriter.cpp @@ -12,8 +12,8 @@ #include "llvm/XRay/FDRTraceWriter.h" #include <tuple> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -37,9 +37,10 @@ template <size_t Index> struct IndexedWriter { return 0; } }; +} // namespace template <uint8_t Kind, class... Values> -Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { +static Error writeMetadata(support::endian::Writer &OS, Values &&...Ds) { // The first bit in the first byte of metadata records is always set to 1, so // we ensure this is the case when we write out the first byte of the record. uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u}; @@ -54,8 +55,6 @@ Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { return Error::success(); } -} // namespace - FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H) : OS(O, llvm::endianness::native) { // We need to re-construct a header, by writing the fields we care about for @@ -146,6 +145,3 @@ Error FDRTraceWriter::visit(FunctionRecord &R) { OS.write(R.delta()); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FileHeaderReader.cpp b/llvm/lib/XRay/FileHeaderReader.cpp index 6b6daf9..681cef7 100644 --- a/llvm/lib/XRay/FileHeaderReader.cpp +++ b/llvm/lib/XRay/FileHeaderReader.cpp @@ -7,12 +7,13 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FileHeaderReader.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; // Populates the FileHeader reference by reading the first 32 bytes of the file. -Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint64_t &OffsetPtr) { +Expected<XRayFileHeader> +xray::readBinaryFormatHeader(DataExtractor &HeaderExtractor, + uint64_t &OffsetPtr) { // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? @@ -68,6 +69,3 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, OffsetPtr += 16; return std::move(FileHeader); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/LogBuilderConsumer.cpp b/llvm/lib/XRay/LogBuilderConsumer.cpp index ffb49f9..f0fc336 100644 --- a/llvm/lib/XRay/LogBuilderConsumer.cpp +++ b/llvm/lib/XRay/LogBuilderConsumer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecordConsumer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) { if (!R) @@ -32,6 +32,3 @@ Error PipelineConsumer::consume(std::unique_ptr<Record> R) { Result = joinErrors(std::move(Result), R->apply(*V)); return Result; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Profile.cpp b/llvm/lib/XRay/Profile.cpp index 1b340e5..ecb767b 100644 --- a/llvm/lib/XRay/Profile.cpp +++ b/llvm/lib/XRay/Profile.cpp @@ -18,8 +18,8 @@ #include "llvm/XRay/Trace.h" #include <memory> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Profile::Profile(const Profile &O) { // We need to re-create all the tries from the original (O), into the current @@ -46,6 +46,7 @@ struct BlockHeader { uint32_t Number; uint64_t Thread; }; +} // namespace static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, uint64_t &Offset) { @@ -115,8 +116,6 @@ static Expected<Profile::Data> readData(DataExtractor &Extractor, return D; } -} // namespace - Error Profile::addBlock(Block &&B) { if (B.PathData.empty()) return make_error<StringError>( @@ -189,7 +188,7 @@ Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { return Node->ID; } -Profile mergeProfilesByThread(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; using PathDataMapPtr = std::unique_ptr<PathDataMap>; @@ -228,7 +227,7 @@ Profile mergeProfilesByThread(const Profile &L, const Profile &R) { return Merged; } -Profile mergeProfilesByStack(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; PathDataMap PathData; @@ -258,7 +257,7 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) { return Merged; } -Expected<Profile> loadProfile(StringRef Filename) { +Expected<Profile> xray::loadProfile(StringRef Filename) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); if (!FdOrErr) return FdOrErr.takeError(); @@ -322,7 +321,7 @@ struct StackEntry { } // namespace -Expected<Profile> profileFromTrace(const Trace &T) { +Expected<Profile> xray::profileFromTrace(const Trace &T) { Profile P; // The implementation of the algorithm re-creates the execution of @@ -397,6 +396,3 @@ Expected<Profile> profileFromTrace(const Trace &T) { return P; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordInitializer.cpp b/llvm/lib/XRay/RecordInitializer.cpp index 68ab3db..83d5f14 100644 --- a/llvm/lib/XRay/RecordInitializer.cpp +++ b/llvm/lib/XRay/RecordInitializer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordInitializer::visit(BufferExtents &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t))) @@ -426,6 +426,3 @@ Error RecordInitializer::visit(FunctionRecord &R) { assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset)); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordPrinter.cpp b/llvm/lib/XRay/RecordPrinter.cpp index 32d4210..b9b7a16 100644 --- a/llvm/lib/XRay/RecordPrinter.cpp +++ b/llvm/lib/XRay/RecordPrinter.cpp @@ -9,8 +9,8 @@ #include "llvm/Support/FormatVariadic.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordPrinter::visit(BufferExtents &R) { OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim; @@ -103,6 +103,3 @@ Error RecordPrinter::visit(FunctionRecord &R) { OS << Delim; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Trace.cpp b/llvm/lib/XRay/Trace.cpp index 74515b1..14a3f01 100644 --- a/llvm/lib/XRay/Trace.cpp +++ b/llvm/lib/XRay/Trace.cpp @@ -29,11 +29,9 @@ using namespace llvm; using namespace llvm::xray; using llvm::yaml::Input; -namespace { - -Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return make_error<StringError>( "Not enough bytes for an XRay log.", @@ -265,8 +263,9 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, /// what FunctionRecord instances use, and we no longer need to include the CPU /// id in the CustomEventRecord. /// -Error loadFDRLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) { +static Error loadFDRLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return createStringError(std::make_error_code(std::errc::invalid_argument), @@ -348,8 +347,8 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian, return Error::success(); } -Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { YAMLXRayTrace Trace; Input In(Data); In >> Trace; @@ -376,7 +375,6 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, }); return Error::success(); } -} // namespace Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); diff --git a/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir new file mode 100644 index 0000000..a4aad57 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- +name: buffer_load_lds_not_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: buffer_load_lds_not_valu + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec + ; CHECK-NEXT: $m0 = S_MOV_B32 0 + ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0 + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec + ; CHECK-NEXT: $m0 = S_MOV_B32 1 + ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0 + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec + ; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0 + ; CHECK-NEXT: S_ENDPGM 0 + $exec = IMPLICIT_DEF + %0:vgpr_32 = IMPLICIT_DEF + %1:sgpr_128 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec + %5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0 + $m0 = S_MOV_B32 1 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0 + %6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec + %7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec + %8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec + %9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec + %10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec + %11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec + SCHED_GROUP_BARRIER 2, 2, 0 + SCHED_GROUP_BARRIER 4, 1 ,0 + SCHED_GROUP_BARRIER 2, 2, 0 + SCHED_GROUP_BARRIER 4, 1 ,0 + SCHED_GROUP_BARRIER 2, 4, 0 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir new file mode 100644 index 0000000..389283a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir @@ -0,0 +1,523 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v \ +# RUN: -run-pass=phi-node-elimination,register-coalescer,riscv-insert-vsetvli | FileCheck %s + +--- | + define void @xsfmm_same_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 noundef %tm, i64 noundef %tn, i64 noundef %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + ret void + } + + define void @xsfmm_different_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 4) + ret void + } + + define void @xsfmm_different_state_bf(<vscale x 32 x half> %tile1, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64 2, <vscale x 32 x bfloat> %tile2, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2) + ret void + } + + define <vscale x 64 x i8> @interleave_rvv_and_xsfmm(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) { + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl) + %1 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl) + call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl) + ret <vscale x 64 x i8> %1 + } + + define <vscale x 64 x i8> @interleave_rvv_and_xsfmm2(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) { + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %tile, i64 %vl) + %1 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl) + %2 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl) + call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl) + ret <vscale x 64 x i8> %2 + } + + define void @consecutive_xsfmm(<vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, ptr %base) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 0, <vscale x 32 x half> %tile, <vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, i64 2) + call void @llvm.riscv.sf.vste16.i64(i64 0, ptr %base, i64 %tn) + ret void + } + + define i64 @vsettnt_max(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2) + %1 = call i64 @llvm.riscv.sf.vsettnt_max.i64(i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettm(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettn(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettn.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettk(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettk.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define void @sf_vtzero(i64 %tm, i64 %tn) { + entry: + call void @llvm.riscv.sf.vtzero.i64(i64 1, i64 %tm, i64 %tn, i64 3, i64 4) + ret void + } + + declare void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64, <vscale x 32 x half>, <vscale x 32 x half>, i64, i64, i64, i64) + declare void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i64, i64, i64, i64) + declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64, i64) + declare <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, i64) + declare void @llvm.riscv.sf.vste16.i64(i64, ptr, i64) + declare i64 @llvm.riscv.sf.vsettnt_max.i64(i64, i64) + declare i64 @llvm.riscv.sf.vsettm.i64(i64, i64, i64) + declare i64 @llvm.riscv.sf.vsettn.i64(i64, i64, i64) + declare i64 @llvm.riscv.sf.vsettk.i64(i64, i64, i64) + declare void @llvm.riscv.sf.vtzero.i64(i64, i64, i64, i64, i64) +... +--- +name: xsfmm_same_state +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_same_state + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoRET +... +--- +name: xsfmm_different_state +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_different_state + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1544 /* e16, w4 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 4, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 4, implicit $frm + PseudoRET +... +--- +name: xsfmm_different_state_bf +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_different_state_bf + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1288 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F_ALT $t2, [[COPY3]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F_ALT $t2, %1:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoRET +... +--- +name: interleave_rvv_and_xsfmm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vrm8 } + - { id: 5, class: vrm8 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11 + ; CHECK-LABEL: name: interleave_rvv_and_xsfmm + ; CHECK: liveins: $v8m8, $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[PseudoSF_VTMV_V_T]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_]], implicit $vtype + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %2:gpr = COPY $x11 + %1:gprnox0 = COPY $x10 + %0:vrm8 = COPY $v8m8 + %3:gpr = ADDI $x0, 1 + %4:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1 + %5:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0 + PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1 + $v8m8 = COPY %5:vrm8 + PseudoRET implicit $v8m8 +... +--- +name: interleave_rvv_and_xsfmm2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vrm8 } + - { id: 5, class: vrm8 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11 + ; CHECK-LABEL: name: interleave_rvv_and_xsfmm2 + ; CHECK: liveins: $v8m8, $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[COPY2]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_1:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[PseudoVADD_VV_M8_]], [[PseudoVADD_VV_M8_]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_1]], implicit $vtype + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %2:gpr = COPY $x11 + %1:gprnox0 = COPY $x10 + %0:vrm8 = COPY $v8m8 + %3:gpr = ADDI $x0, 1 + %4:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %0:vrm8, %1:gprnox0, 3, 0 + %5:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1 + %6:vrm8 = PseudoVADD_VV_M8 $noreg, %4:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0 + PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1 + $v8m8 = COPY %6:vrm8 + PseudoRET implicit $v8m8 +... +--- +name: consecutive_xsfmm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } + - { reg: '$x12', virtual-reg: '%3' } + - { reg: '$x13', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11, $x12, $x13 + ; CHECK-LABEL: name: consecutive_xsfmm + ; CHECK: liveins: $v8m8, $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:gprnox0 = COPY $x13 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY2]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY1]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY3]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY]], [[COPY]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY3]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[COPY1]], [[COPY2]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:vrm8 = COPY $v8m8 + %1:gprnox0 = COPY $x10 + %2:gprnox0 = COPY $x11 + %3:gprnox0 = COPY $x12 + %4:gprnox0 = COPY $x13 + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 2, implicit $frm + PseudoSF_VSTE16 %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 1 + PseudoRET +... +--- +name: vsettnt_max +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: vsettnt_max + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_1:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + %2:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype + %3:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %3:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettm + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettn +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettn + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[PseudoSF_VSETTNT:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNT [[COPY]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTNT]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTNT %0:gprnox0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettk +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettk + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTK]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: sf_vtzero +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } + - { id: 1, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $x11 + ; CHECK-LABEL: name: sf_vtzero + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1536 /* e8, w4 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY]], 3, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_VTZERO_T $t1, $noreg, $noreg, 3, 4, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:gprnox0 = COPY $x10 + %1:gprnox0 = COPY $x11 + PseudoSF_VTZERO_T $t1, %0:gprnox0, %1:gprnox0, 3, 4 + PseudoRET +... diff --git a/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll new file mode 100644 index 0000000..3654aae --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +define <4 x i32> @dot_sext_1(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_sext_1: +; CHECK: .functype dot_sext_1 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + + +define <4 x i32> @dot_sext_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_sext_2: +; CHECK: .functype dot_sext_2 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle2, %shuffle1 + ret <4 x i32> %res +} + +define <4 x i32> @dot_sext_self(<8 x i16> %v) { +; CHECK-LABEL: dot_sext_self: +; CHECK: .functype dot_sext_self (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext = sext <8 x i16> %v to <8 x i32> + %mul = mul <8 x i32> %sext, %sext + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + +; INFO: Negative test +define <4 x i32> @dot_zext(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_zext: +; CHECK: .functype dot_zext (v128, v128) -> (v128) +; CHECK-NEXT: .local v128 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_low_i16x8_u +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_high_i16x8_u +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK-NEXT: i32x4.add +; CHECK-NEXT: # fallthrough-return + %zext1 = zext <8 x i16> %a to <8 x i32> + %zext2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %zext1, %zext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + +; INFO: Negative test +define <4 x i32> @dot_wrong_shuffle(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_wrong_shuffle: +; CHECK: .functype dot_wrong_shuffle (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_low_i16x8_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_high_i16x8_s +; CHECK-NEXT: i32x4.add +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll index e065de3..600241a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll @@ -2,9 +2,278 @@ ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD target triple = "wasm32" +define half @fadd_fmul_contract_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fadd_fmul_contract_f16: +; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fadd_fmul_contract_f16: +; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $0 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $1 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fadd_fmul_contract_f16: +; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fadd_fmul_contract_f16: +; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %mul = fmul contract half %b, %a + %add = fadd contract half %mul, %c + ret half %add +} + +define half @fmuladd_contract_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fmuladd_contract_f16: +; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fmuladd_contract_f16: +; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $0 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fmuladd_contract_f16: +; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fmuladd_contract_f16: +; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c) + ret half %fma +} + +define half @fmuladd_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fmuladd_f16: +; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fmuladd_f16: +; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $0 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fmuladd_f16: +; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fmuladd_f16: +; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %fma = call half @llvm.fmuladd(half %a, half %b, half %c) + ret half %fma +} + + +define float @fadd_fmul_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fadd_fmul_contract_f32: +; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $1, $0 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fadd_fmul_contract_f32: +; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $1, $0 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_f32: +; NOFP16: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $1, $0 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_f32: +; NOSIMD: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $1, $0 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %mul = fmul contract float %b, %a + %add = fadd contract float %mul, %c + ret float %add +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fmuladd_contract_f32: +; RELAXED: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $0, $1 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_contract_f32: +; STRICT: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $0, $1 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_f32: +; NOFP16: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $0, $1 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_f32: +; NOSIMD: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $0, $1 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c) + ret float %fma +} + +define float @fmuladd_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fmuladd_f32: +; RELAXED: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $0, $1 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_f32: +; STRICT: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $0, $1 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_f32: +; NOFP16: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $0, $1 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_f32: +; NOSIMD: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $0, $1 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call float @llvm.fmuladd(float %a, float %b, float %c) + ret float %fma +} + define double @fadd_fmul_contract_f64(double %a, double %b, double %c) { ; RELAXED-LABEL: fadd_fmul_contract_f64: ; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) @@ -19,16 +288,94 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) { ; STRICT-NEXT: f64.mul $push0=, $1, $0 ; STRICT-NEXT: f64.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_f64: +; NOFP16: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $1, $0 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_f64: +; NOSIMD: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $1, $0 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 %mul = fmul contract double %b, %a %add = fadd contract double %mul, %c ret double %add } +define double @fmuladd_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fmuladd_f64: +; RELAXED: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $0, $1 +; RELAXED-NEXT: f64.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_f64: +; STRICT: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $0, $1 +; STRICT-NEXT: f64.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_f64: +; NOFP16: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $0, $1 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_f64: +; NOSIMD: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $0, $1 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call double @llvm.fmuladd(double %a, double %b, double %c) + ret double %fma +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fmuladd_contract_f64: +; RELAXED: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $0, $1 +; RELAXED-NEXT: f64.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_contract_f64: +; STRICT: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $0, $1 +; STRICT-NEXT: f64.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_f64: +; NOFP16: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $0, $1 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_f64: +; NOSIMD: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $0, $1 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c) + ret double %fma +} + define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fadd_fmul_contract_4xf32: ; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_4xf32: @@ -37,31 +384,222 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 ; STRICT-NEXT: f32x4.mul $push0=, $1, $0 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_4xf32: +; NOFP16: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $1, $0 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_4xf32: +; NOSIMD: .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $4 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $3 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $2 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $1 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %mul = fmul contract <4 x float> %b, %a %add = fadd contract <4 x float> %mul, %c ret <4 x float> %add } - define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; RELAXED-LABEL: fadd_fmul_contract_8xf16: ; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f16x8.madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_8xf16: ; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f16x8.mul $push0=, $1, $0 -; STRICT-NEXT: f16x8.add $push1=, $pop0, $2 -; STRICT-NEXT: return $pop1 +; STRICT-NEXT: f16x8.madd $push0=, $1, $0, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fadd_fmul_contract_8xf16: +; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fadd_fmul_contract_8xf16: +; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return %mul = fmul contract <8 x half> %b, %a %add = fadd contract <8 x half> %mul, %c ret <8 x half> %add } - define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fadd_fmul_4xf32: ; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128) @@ -76,16 +614,412 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> ; STRICT-NEXT: f32x4.mul $push0=, $1, $0 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_4xf32: +; NOFP16: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $1, $0 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_4xf32: +; NOSIMD: .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $4 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $3 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $2 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $1 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %mul = fmul <4 x float> %b, %a %add = fadd contract <4 x float> %mul, %c ret <4 x float> %add } +define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_contract_8xf16: +; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_contract_8xf16: +; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fmuladd_contract_8xf16: +; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_contract_8xf16: +; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return + %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + +define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_8xf16: +; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_8xf16: +; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fmuladd_8xf16: +; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_8xf16: +; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return + %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fmuladd_contract_4xf32: ; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fmuladd_contract_4xf32: @@ -94,18 +1028,40 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x ; STRICT-NEXT: f32x4.mul $push0=, $0, $1 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_4xf32: +; NOFP16: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $0, $1 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_4xf32: +; NOSIMD: .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $4, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $3, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $2, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $1, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } -; TODO: This should also have relaxed_madd in RELAXED case define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fmuladd_4xf32: ; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.mul $push0=, $0, $1 -; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2 -; RELAXED-NEXT: return $pop1 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fmuladd_4xf32: ; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) @@ -113,10 +1069,170 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c ; STRICT-NEXT: f32x4.mul $push0=, $0, $1 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_4xf32: +; NOFP16: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $0, $1 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_4xf32: +; NOSIMD: .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $4, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $3, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $2, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $1, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } +define <8 x float> @fmuladd_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; RELAXED-LABEL: fmuladd_8xf32: +; RELAXED: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.mul $push0=, $2, $4 +; RELAXED-NEXT: f32x4.add $push1=, $pop0, $6 +; RELAXED-NEXT: v128.store 16($0), $pop1 +; RELAXED-NEXT: f32x4.mul $push2=, $1, $3 +; RELAXED-NEXT: f32x4.add $push3=, $pop2, $5 +; RELAXED-NEXT: v128.store 0($0), $pop3 +; RELAXED-NEXT: return +; +; STRICT-LABEL: fmuladd_8xf32: +; STRICT: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $2, $4 +; STRICT-NEXT: f32x4.add $push1=, $pop0, $6 +; STRICT-NEXT: v128.store 16($0), $pop1 +; STRICT-NEXT: f32x4.mul $push2=, $1, $3 +; STRICT-NEXT: f32x4.add $push3=, $pop2, $5 +; STRICT-NEXT: v128.store 0($0), $pop3 +; STRICT-NEXT: return +; +; NOFP16-LABEL: fmuladd_8xf32: +; NOFP16: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $2, $4 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6 +; NOFP16-NEXT: v128.store 16($0), $pop1 +; NOFP16-NEXT: f32x4.mul $push2=, $1, $3 +; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5 +; NOFP16-NEXT: v128.store 0($0), $pop3 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_8xf32: +; NOSIMD: .functype fmuladd_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $16 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $24 +; NOSIMD-NEXT: f32.store 28($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $15 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $23 +; NOSIMD-NEXT: f32.store 24($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $14 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $22 +; NOSIMD-NEXT: f32.store 20($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $13 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $21 +; NOSIMD-NEXT: f32.store 16($0), $pop7 +; NOSIMD-NEXT: f32.mul $push8=, $4, $12 +; NOSIMD-NEXT: f32.add $push9=, $pop8, $20 +; NOSIMD-NEXT: f32.store 12($0), $pop9 +; NOSIMD-NEXT: f32.mul $push10=, $3, $11 +; NOSIMD-NEXT: f32.add $push11=, $pop10, $19 +; NOSIMD-NEXT: f32.store 8($0), $pop11 +; NOSIMD-NEXT: f32.mul $push12=, $2, $10 +; NOSIMD-NEXT: f32.add $push13=, $pop12, $18 +; NOSIMD-NEXT: f32.store 4($0), $pop13 +; NOSIMD-NEXT: f32.mul $push14=, $1, $9 +; NOSIMD-NEXT: f32.add $push15=, $pop14, $17 +; NOSIMD-NEXT: f32.store 0($0), $pop15 +; NOSIMD-NEXT: return + %fma = call <8 x float> @llvm.fmuladd(<8 x float> %a, <8 x float> %b, <8 x float> %c) + ret <8 x float> %fma +} + +define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_contract_2xf64: +; RELAXED: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_contract_2xf64: +; STRICT: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_2xf64: +; NOFP16: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $0, $1 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_2xf64: +; NOSIMD: .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $2, $4 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $1, $3 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} + +define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_2xf64: +; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_2xf64: +; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_2xf64: +; NOFP16: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $0, $1 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_2xf64: +; NOSIMD: .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $2, $4 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $1, $3 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} + define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fma_4xf32: ; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128) @@ -167,6 +1283,44 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15 ; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18 ; STRICT-NEXT: return $pop19 +; +; NOFP16-LABEL: fma_4xf32: +; NOFP16: .functype fma_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.extract_lane $push2=, $0, 0 +; NOFP16-NEXT: f32x4.extract_lane $push1=, $1, 0 +; NOFP16-NEXT: f32x4.extract_lane $push0=, $2, 0 +; NOFP16-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0 +; NOFP16-NEXT: f32x4.splat $push4=, $pop3 +; NOFP16-NEXT: f32x4.extract_lane $push7=, $0, 1 +; NOFP16-NEXT: f32x4.extract_lane $push6=, $1, 1 +; NOFP16-NEXT: f32x4.extract_lane $push5=, $2, 1 +; NOFP16-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5 +; NOFP16-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8 +; NOFP16-NEXT: f32x4.extract_lane $push12=, $0, 2 +; NOFP16-NEXT: f32x4.extract_lane $push11=, $1, 2 +; NOFP16-NEXT: f32x4.extract_lane $push10=, $2, 2 +; NOFP16-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10 +; NOFP16-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13 +; NOFP16-NEXT: f32x4.extract_lane $push17=, $0, 3 +; NOFP16-NEXT: f32x4.extract_lane $push16=, $1, 3 +; NOFP16-NEXT: f32x4.extract_lane $push15=, $2, 3 +; NOFP16-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15 +; NOFP16-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18 +; NOFP16-NEXT: return $pop19 +; +; NOSIMD-LABEL: fma_4xf32: +; NOSIMD: .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fmaf, $4, $8, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop0 +; NOSIMD-NEXT: call $push1=, fmaf, $3, $7, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop1 +; NOSIMD-NEXT: call $push2=, fmaf, $2, $6, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop2 +; NOSIMD-NEXT: call $push3=, fmaf, $1, $5, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop3 +; NOSIMD-NEXT: return %fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } @@ -176,9 +1330,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; RELAXED-LABEL: fadd_fmul_contract_8xf32: ; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6 ; RELAXED-NEXT: v128.store 16($0), $pop0 -; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1 +; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5 ; RELAXED-NEXT: v128.store 0($0), $pop1 ; RELAXED-NEXT: return ; @@ -192,17 +1346,56 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; STRICT-NEXT: f32x4.add $push3=, $pop2, $5 ; STRICT-NEXT: v128.store 0($0), $pop3 ; STRICT-NEXT: return +; +; NOFP16-LABEL: fadd_fmul_contract_8xf32: +; NOFP16: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $4, $2 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6 +; NOFP16-NEXT: v128.store 16($0), $pop1 +; NOFP16-NEXT: f32x4.mul $push2=, $3, $1 +; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5 +; NOFP16-NEXT: v128.store 0($0), $pop3 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fadd_fmul_contract_8xf32: +; NOSIMD: .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $16, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $24 +; NOSIMD-NEXT: f32.store 28($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $15, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $23 +; NOSIMD-NEXT: f32.store 24($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $14, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $22 +; NOSIMD-NEXT: f32.store 20($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $13, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $21 +; NOSIMD-NEXT: f32.store 16($0), $pop7 +; NOSIMD-NEXT: f32.mul $push8=, $12, $4 +; NOSIMD-NEXT: f32.add $push9=, $pop8, $20 +; NOSIMD-NEXT: f32.store 12($0), $pop9 +; NOSIMD-NEXT: f32.mul $push10=, $11, $3 +; NOSIMD-NEXT: f32.add $push11=, $pop10, $19 +; NOSIMD-NEXT: f32.store 8($0), $pop11 +; NOSIMD-NEXT: f32.mul $push12=, $10, $2 +; NOSIMD-NEXT: f32.add $push13=, $pop12, $18 +; NOSIMD-NEXT: f32.store 4($0), $pop13 +; NOSIMD-NEXT: f32.mul $push14=, $9, $1 +; NOSIMD-NEXT: f32.add $push15=, $pop14, $17 +; NOSIMD-NEXT: f32.store 0($0), $pop15 +; NOSIMD-NEXT: return %mul = fmul contract <8 x float> %b, %a %add = fadd contract <8 x float> %mul, %c ret <8 x float> %add } - define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; RELAXED-LABEL: fadd_fmul_contract_2xf64: ; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_2xf64: @@ -211,28 +1404,64 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, ; STRICT-NEXT: f64x2.mul $push0=, $1, $0 ; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_2xf64: +; NOFP16: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $1, $0 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_2xf64: +; NOSIMD: .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $4, $2 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $3, $1 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return %mul = fmul contract <2 x double> %b, %a %add = fadd contract <2 x double> %mul, %c ret <2 x double> %add } -define float @fadd_fmul_contract_f32(float %a, float %b, float %c) { -; RELAXED-LABEL: fadd_fmul_contract_f32: -; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fadd_fmul_2xf64: +; RELAXED: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32.mul $push0=, $1, $0 -; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: f64x2.mul $push0=, $1, $0 +; RELAXED-NEXT: f64x2.add $push1=, $pop0, $2 ; RELAXED-NEXT: return $pop1 ; -; STRICT-LABEL: fadd_fmul_contract_f32: -; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-LABEL: fadd_fmul_2xf64: +; STRICT: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f32.mul $push0=, $1, $0 -; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: f64x2.mul $push0=, $1, $0 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 - %mul = fmul contract float %b, %a - %add = fadd contract float %mul, %c - ret float %add +; +; NOFP16-LABEL: fadd_fmul_2xf64: +; NOFP16: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $1, $0 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_2xf64: +; NOSIMD: .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $4, $2 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $3, $1 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %mul = fmul <2 x double> %b, %a + %add = fadd <2 x double> %mul, %c + ret <2 x double> %add } define float @fma_f32(float %a, float %b, float %c) { @@ -247,6 +1476,18 @@ define float @fma_f32(float %a, float %b, float %c) { ; STRICT-NEXT: # %bb.0: ; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2 ; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fma_f32: +; NOFP16: .functype fma_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, fmaf, $0, $1, $2 +; NOFP16-NEXT: return $pop0 +; +; NOSIMD-LABEL: fma_f32: +; NOSIMD: .functype fma_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fmaf, $0, $1, $2 +; NOSIMD-NEXT: return $pop0 %fma = call float @llvm.fma(float %a, float %b, float %c) ret float %fma } @@ -263,6 +1504,18 @@ define double @fma_f64(double %a, double %b, double %c) { ; STRICT-NEXT: # %bb.0: ; STRICT-NEXT: call $push0=, fma, $0, $1, $2 ; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fma_f64: +; NOFP16: .functype fma_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, fma, $0, $1, $2 +; NOFP16-NEXT: return $pop0 +; +; NOSIMD-LABEL: fma_f64: +; NOSIMD: .functype fma_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fma, $0, $1, $2 +; NOSIMD-NEXT: return $pop0 %fma = call double @llvm.fma(double %a, double %b, double %c) ret double %fma } diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll index 6e2d860..b90c1da 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll @@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 ; RELAXED-LABEL: fsub_fmul_contract_4xf32: ; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_4xf32: @@ -46,15 +46,14 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h ; RELAXED-LABEL: fsub_fmul_contract_8xf16: ; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f16x8.nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_8xf16: ; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f16x8.mul $push0=, $1, $0 -; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0 -; STRICT-NEXT: return $pop1 +; STRICT-NEXT: f16x8.nmadd $push0=, $1, $0, $2 +; STRICT-NEXT: return $pop0 %mul = fmul contract <8 x half> %b, %a %sub = fsub contract <8 x half> %c, %mul ret <8 x half> %sub @@ -84,9 +83,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; RELAXED-LABEL: fsub_fmul_contract_8xf32: ; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6 ; RELAXED-NEXT: v128.store 16($0), $pop0 -; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5 ; RELAXED-NEXT: v128.store 0($0), $pop1 ; RELAXED-NEXT: return ; @@ -110,7 +109,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, ; RELAXED-LABEL: fsub_fmul_contract_2xf64: ; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_2xf64: @@ -143,3 +142,55 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) { ret float %sub } +define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_8xf16: +; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_8xf16: +; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.nmadd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 + %fneg = fneg <8 x half> %a + %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + +define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fmuladd_4xf32: +; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_4xf32: +; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $0, $1 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %fneg = fneg <4 x float> %a + %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c) + ret <4 x float> %fma +} + +define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_2xf64: +; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_2xf64: +; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %fneg = fneg <2 x double> %a + %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll index a0c243b..f3950b7 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -1,16 +1,15 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;; A minimal test case. llc will crash if global variables already has a section -;; prefix. Subsequent PRs will expand on this test case to test the hotness -;; reconciliation implementation. - -; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +;; A minimal test case. Subsequent PRs will expand on this test case +;; (e.g., with more functions, variables and profiles) and test the hotness +;; reconcillation implementation. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR -; ERR: Global variable hot_bss already has a section prefix hot +; IR: .section .bss.hot.,"aw" @hot_bss = internal global i32 0, !section_prefix !17 diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index ce06d17..604b4fd 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 ; AGG-NEXT: .section .data.unlikely.,"aw",@progbits +;; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; For @data_with_unknown_hotness ; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness ; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits ; UNIQ: .section .data,"aw",@progbits,unique,9 -; The `.section` directive is omitted for .data with -unique-section-names=false. -; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; AGG: .data ; COMMON: .Ldata_with_unknown_hotness: -; For @hot_data_custom_bar_section -; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. +; For variables that are not eligible for section prefix annotation ; COMMON: .type hot_data_custom_bar_section,@object ; SYM-NEXT: .section bar,"aw",@progbits ; SYM: hot_data_custom_bar_section ; UNIQ: .section bar,"aw",@progbits ; AGG: .section bar,"aw",@progbits +; SYM: .section .data.llvm.fake_var,"aw" +; UNIQ: .section .data,"aw" +; AGG: .data + +;; No section for linker declaration +; COMMON-NOT: qux + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu" @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 +@llvm.fake_var = internal global i32 123 +@qux = external global i64 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 5aa266d..69abf6e 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1447,3 +1447,158 @@ define i1 @eq_i512_load_arg(ptr%p, i512 %b) { %r = icmp eq i512 %a, %b ret i1 %r } + +; Tests for any/allbits from memory. + +define i1 @anybits_i128_load_arg(ptr %w) { +; ANY-LABEL: anybits_i128_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: orq 8(%rdi), %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp ne i128 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i128_load_arg(ptr %w) { +; SSE2-LABEL: allbits_i128_load_arg: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allbits_i128_load_arg: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: ptest %xmm1, %xmm0 +; SSE41-NEXT: setb %al +; SSE41-NEXT: retq +; +; AVXANY-LABEL: allbits_i128_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVXANY-NEXT: vptest %xmm1, %xmm0 +; AVXANY-NEXT: setb %al +; AVXANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp eq i128 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i256_load_arg(ptr %w) { +; ANY-LABEL: anybits_i256_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: movq 8(%rdi), %rcx +; ANY-NEXT: orq 24(%rdi), %rcx +; ANY-NEXT: orq 16(%rdi), %rax +; ANY-NEXT: orq %rcx, %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp ne i256 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i256_load_arg(ptr %w) { +; SSE-LABEL: allbits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: andq 24(%rdi), %rcx +; SSE-NEXT: andq 16(%rdi), %rax +; SSE-NEXT: andq %rcx, %rax +; SSE-NEXT: cmpq $-1, %rax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1-LABEL: allbits_i256_load_arg: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vptest %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allbits_i256_load_arg: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vptest %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allbits_i256_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp eq i256 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i512_load_arg(ptr %w) { +; ANY-LABEL: anybits_i512_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq 16(%rdi), %rax +; ANY-NEXT: movq (%rdi), %rcx +; ANY-NEXT: movq 8(%rdi), %rdx +; ANY-NEXT: movq 24(%rdi), %rsi +; ANY-NEXT: orq 56(%rdi), %rsi +; ANY-NEXT: orq 40(%rdi), %rdx +; ANY-NEXT: orq %rsi, %rdx +; ANY-NEXT: orq 48(%rdi), %rax +; ANY-NEXT: orq 32(%rdi), %rcx +; ANY-NEXT: orq %rax, %rcx +; ANY-NEXT: orq %rdx, %rcx +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp ne i512 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i512_load_arg(ptr %w) { +; NO512-LABEL: allbits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: andq 56(%rdi), %rsi +; NO512-NEXT: andq 40(%rdi), %rdx +; NO512-NEXT: andq %rsi, %rdx +; NO512-NEXT: andq 48(%rdi), %rax +; NO512-NEXT: andq 32(%rdi), %rcx +; NO512-NEXT: andq %rax, %rcx +; NO512-NEXT: andq %rdx, %rcx +; NO512-NEXT: cmpq $-1, %rcx +; NO512-NEXT: sete %al +; NO512-NEXT: retq +; +; AVX512-LABEL: allbits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 +; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp eq i512 %ld, -1 + ret i1 %cmp +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 48aec4b..57da338 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -917,11 +917,11 @@ main: # CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02] f16x8.nearest - # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02] - f16x8.relaxed_madd + # CHECK: f16x8.madd # encoding: [0xfd,0xce,0x02] + f16x8.madd - # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02] - f16x8.relaxed_nmadd + # CHECK: f16x8.nmadd # encoding: [0xfd,0xcf,0x02] + f16x8.nmadd # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02] i16x8.trunc_sat_f16x8_s diff --git a/llvm/test/TableGen/listsplat.td b/llvm/test/TableGen/listsplat.td index 5a93a4c..43803d6 100644 --- a/llvm/test/TableGen/listsplat.td +++ b/llvm/test/TableGen/listsplat.td @@ -1,4 +1,5 @@ // RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s // CHECK: ------------- Classes ----------------- // CHECK-NEXT: class X<int X:a = ?, int X:b = ?> { @@ -73,3 +74,8 @@ def DYa1 : Y<"a", 1>; def DYa2 : Y<"a", 2>; def DZ : X<42, !size([1, 2, 3])>; + +#ifdef ERROR1 +// ERROR1: !listsplat count -1 is negative +defvar E = !listsplat("", -1); +#endif diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll index 29198f34..205184b 100644 --- a/llvm/test/Transforms/PGOProfile/data-access-profile.ll +++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll @@ -3,55 +3,72 @@ ; RUN: rm -rf %t && split-file %s %t && cd %t -;; Read a text profile and merge it into indexed profile. +;; Read text profiles and merge them into indexed profiles. ; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata +; RUN: llvm-profdata merge --memprof-version=4 memprof-no-dap.yaml -o memprof-no-dap.profdata ;; Run optimizer pass on an IR module without IR functions, and test that global ;; variables in the module could be annotated (i.e., no early return), ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ -; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT +; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT ;; Run optimizer pass on the IR, and check the section prefix. ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ -; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT +; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT -;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix. -;; The output text IR shouldn't have `section_prefix` +;; Run memprof without providing memprof data. Test that IR has module flag +;; `EnableDataAccessProf` as 0. +; RUN: opt -passes='memprof-use<profile-filename=memprof-no-dap.profdata>' -memprof-annotate-static-data-prefix \ +; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefix=FLAG + +;; Run memprof without explicitly setting -memprof-annotate-static-data-prefix. +;; The output text IR shouldn't have `section_prefix` or EnableDataAccessProf module flag. ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \ -; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix" +; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --check-prefix=FLAGLESS --implicit-check-not="section_prefix" ; LOG: Skip annotating string literal .str ; LOG: Global variable var1 is annotated as hot ; LOG: Global variable var2.llvm.125 is annotated as hot ; LOG: Global variable bar is not annotated ; LOG: Global variable foo is annotated as unlikely -; LOG: Global variable var3 has explicit section name. Skip annotating. -; LOG: Global variable var4 has explicit section name. Skip annotating. +; LOG: Skip annotation for var3 due to explicit section name. +; LOG: Skip annotation for var4 due to explicit section name. +; LOG: Skip annotation for llvm.fake_var due to name starts with `llvm.`. +; LOG: Skip annotation for qux due to linker declaration. ;; String literals are not annotated. -; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde" -; PREFIX-NOT: section_prefix -; PREFIX: @var1 = global i32 123, !section_prefix !0 +; IR: @.str = unnamed_addr constant [5 x i8] c"abcde" +; IR-NOT: section_prefix +; IR: @var1 = global i32 123, !section_prefix !0 ;; @var.llvm.125 will be canonicalized to @var2 for profile look-up. -; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 +; IR-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 ;; @bar is not seen in hot symbol or known symbol set, so it won't get a section ;; prefix. Test this by testing that there is no section_prefix between @bar and ;; @foo. -; PREFIX-NEXT: @bar = global i16 3 -; PREFIX-NOT: !section_prefix +; IR-NEXT: @bar = global i16 3 +; IR-NOT: !section_prefix ;; @foo is unlikely. -; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1 +; IR-NEXT: @foo = global i8 2, !section_prefix !1 + +; IR-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" +; IR-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 + +; IR: @llvm.fake_var = global i32 123 +; IR-NOT: !section_prefix +; IR: @qux = external global i64 +; IR-NOT: !section_prefix -; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" -; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 +; IR: attributes #0 = { "rodata-section"="sec2" } -; PREFIX: attributes #0 = { "rodata-section"="sec2" } +; IR: !0 = !{!"section_prefix", !"hot"} +; IR-NEXT: !1 = !{!"section_prefix", !"unlikely"} +; IR-NEXT: !2 = !{i32 2, !"EnableDataAccessProf", i32 1} -; PREFIX: !0 = !{!"section_prefix", !"hot"} -; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"} +; FLAG: !{i32 2, !"EnableDataAccessProf", i32 0} +; FLAGLESS-NOT: EnableDataAccessProf ; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix. ; STAT: 2 memprof - Number of global vars with user-specified section (not annotated). @@ -72,6 +89,24 @@ DataAccessProfiles: - foo KnownColdStrHashes: [ 999, 1001 ] ... +;--- memprof-no-dap.yaml +--- +# A memprof file with without data access profiles. The heap records are simplified +# to pass profile parsing and don't need to match the IR. +HeapProfileRecords: + - GUID: 0xdeadbeef12345678 + AllocSites: + - Callstack: + - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true } + MemInfoBlock: + AllocCount: 111 + TotalSize: 222 + TotalLifetime: 333 + TotalLifetimeAccessDensity: 444 + CallSites: + - Frames: + - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true } +... ;--- input.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" @@ -84,11 +119,14 @@ target triple = "x86_64-unknown-linux-gnu" @foo = global i8 2 @var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" @var4 = constant [1 x i64][i64 98765] #0 +@llvm.fake_var = global i32 123 +@qux = external global i64 define i32 @func() { %a = load i32, ptr @var1 %b = load i32, ptr @var2.llvm.125 - %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b) + %c = load i32, ptr @llvm.fake_var + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c) ret i32 %ret } @@ -108,5 +146,8 @@ target triple = "x86_64-unknown-linux-gnu" @foo = global i8 2 @var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" @var4 = constant [1 x i64][i64 98765] #0 +@llvm.fake_var = global i32 123 +@qux = external global i64 + attributes #0 = { "rodata-section"="sec2" } diff --git a/llvm/unittests/ADT/BitFieldsTest.cpp b/llvm/unittests/ADT/BitFieldsTest.cpp index 3062d5d..ae541fe 100644 --- a/llvm/unittests/ADT/BitFieldsTest.cpp +++ b/llvm/unittests/ADT/BitFieldsTest.cpp @@ -247,8 +247,8 @@ TEST(BitfieldsTest, ValueTooBigBounded) { Bitfield::set<A>(Storage, 0); Bitfield::set<A>(Storage, -1); Bitfield::set<A>(Storage, -2); - EXPECT_DEBUG_DEATH(Bitfield::set<A>(Storage, 2), "value is too big"); - EXPECT_DEBUG_DEATH(Bitfield::set<A>(Storage, -3), "value is too small"); + EXPECT_DEBUG_DEATH(Bitfield::set<A>(Storage, 2), "value is out of range"); + EXPECT_DEBUG_DEATH(Bitfield::set<A>(Storage, -3), "value is out of range"); } #endif diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 2431db9..67fee96 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -1066,6 +1066,115 @@ TEST_F(ConstantFPRangeTest, sub) { #endif } +TEST_F(ConstantFPRangeTest, mul) { + EXPECT_EQ(Full.mul(Full), NonNaN.unionWith(QNaN)); + EXPECT_EQ(Full.mul(Empty), Empty); + EXPECT_EQ(Empty.mul(Full), Empty); + EXPECT_EQ(Empty.mul(Empty), Empty); + EXPECT_EQ(One.mul(One), ConstantFPRange(APFloat(1.0))); + EXPECT_EQ(Some.mul(Some), + ConstantFPRange::getNonNaN(APFloat(-9.0), APFloat(9.0))); + EXPECT_EQ(SomePos.mul(SomeNeg), + ConstantFPRange::getNonNaN(APFloat(-9.0), APFloat(-0.0))); + EXPECT_EQ(PosInf.mul(PosInf), PosInf); + EXPECT_EQ(NegInf.mul(NegInf), PosInf); + EXPECT_EQ(PosInf.mul(Finite), NonNaN.unionWith(QNaN)); + EXPECT_EQ(NegInf.mul(Finite), NonNaN.unionWith(QNaN)); + EXPECT_EQ(PosInf.mul(NegInf), NegInf); + EXPECT_EQ(NegInf.mul(PosInf), NegInf); + EXPECT_EQ(PosZero.mul(NegZero), NegZero); + EXPECT_EQ(PosZero.mul(Zero), Zero); + EXPECT_EQ(NegZero.mul(NegZero), PosZero); + EXPECT_EQ(NegZero.mul(Zero), Zero); + EXPECT_EQ(NaN.mul(NaN), QNaN); + EXPECT_EQ(NaN.mul(Finite), QNaN); + +#if defined(EXPENSIVE_CHECKS) + EnumerateTwoInterestingConstantFPRanges( + [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) { + ConstantFPRange Res = LHS.mul(RHS); + ConstantFPRange Expected = + ConstantFPRange::getEmpty(LHS.getSemantics()); + EnumerateValuesInConstantFPRange( + LHS, + [&](const APFloat &LHSC) { + EnumerateValuesInConstantFPRange( + RHS, + [&](const APFloat &RHSC) { + APFloat Prod = LHSC * RHSC; + EXPECT_TRUE(Res.contains(Prod)) + << "Wrong result for " << LHS << " * " << RHS + << ". The result " << Res << " should contain " << Prod; + if (!Expected.contains(Prod)) + Expected = Expected.unionWith(ConstantFPRange(Prod)); + }, + /*IgnoreNaNPayload=*/true); + }, + /*IgnoreNaNPayload=*/true); + EXPECT_EQ(Res, Expected) + << "Suboptimal result for " << LHS << " * " << RHS << ". Expected " + << Expected << ", but got " << Res; + }, + SparseLevel::SpecialValuesOnly); +#endif +} + +TEST_F(ConstantFPRangeTest, div) { + EXPECT_EQ(Full.div(Full), NonNaN.unionWith(QNaN)); + EXPECT_EQ(Full.div(Empty), Empty); + EXPECT_EQ(Empty.div(Full), Empty); + EXPECT_EQ(Empty.div(Empty), Empty); + EXPECT_EQ(One.div(One), ConstantFPRange(APFloat(1.0))); + EXPECT_EQ(Some.div(Some), NonNaN.unionWith(QNaN)); + EXPECT_EQ(SomePos.div(SomeNeg), + ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/true), + APFloat::getZero(Sem, /*Negative=*/true), + /*MayBeQNaN=*/true, /*MayBeSNaN=*/false)); + EXPECT_EQ(PosInf.div(PosInf), QNaN); + EXPECT_EQ(NegInf.div(NegInf), QNaN); + EXPECT_EQ(PosInf.div(Finite), NonNaN); + EXPECT_EQ(NegInf.div(Finite), NonNaN); + EXPECT_EQ(PosInf.div(NegInf), QNaN); + EXPECT_EQ(NegInf.div(PosInf), QNaN); + EXPECT_EQ(Zero.div(Zero), QNaN); + EXPECT_EQ(SomePos.div(PosInf), PosZero); + EXPECT_EQ(SomeNeg.div(PosInf), NegZero); + EXPECT_EQ(PosInf.div(SomePos), PosInf); + EXPECT_EQ(NegInf.div(SomeNeg), PosInf); + EXPECT_EQ(NegInf.div(Some), NonNaN); + EXPECT_EQ(NaN.div(NaN), QNaN); + EXPECT_EQ(NaN.div(Finite), QNaN); + +#if defined(EXPENSIVE_CHECKS) + EnumerateTwoInterestingConstantFPRanges( + [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) { + ConstantFPRange Res = LHS.div(RHS); + ConstantFPRange Expected = + ConstantFPRange::getEmpty(LHS.getSemantics()); + EnumerateValuesInConstantFPRange( + LHS, + [&](const APFloat &LHSC) { + EnumerateValuesInConstantFPRange( + RHS, + [&](const APFloat &RHSC) { + APFloat Val = LHSC / RHSC; + EXPECT_TRUE(Res.contains(Val)) + << "Wrong result for " << LHS << " / " << RHS + << ". The result " << Res << " should contain " << Val; + if (!Expected.contains(Val)) + Expected = Expected.unionWith(ConstantFPRange(Val)); + }, + /*IgnoreNaNPayload=*/true); + }, + /*IgnoreNaNPayload=*/true); + EXPECT_EQ(Res, Expected) + << "Suboptimal result for " << LHS << " / " << RHS << ". Expected " + << Expected << ", but got " << Res; + }, + SparseLevel::SpecialValuesOnly); +#endif +} + TEST_F(ConstantFPRangeTest, flushDenormals) { const fltSemantics &FP8Sem = APFloat::Float8E4M3(); APFloat NormalVal = APFloat::getSmallestNormalized(FP8Sem); diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index bdcb8a3..343c2bb71 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1129,6 +1129,7 @@ Transforms/LowerIFunc/ifunc-alias.ll Transforms/LowerIFunc/ifunc-nonsense-resolvers.ll Transforms/LowerIFunc/ifunc-program-addrspace.ll Transforms/LowerIFunc/lower-ifunc.ll +Transforms/LowerMatrixIntrinsics/data-layout-multiply-fused.ll Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll Transforms/LowerMatrixIntrinsics/multiply-fused.ll Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -1311,82 +1312,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SLPVectorizer/AArch64/gather-root.ll -Transforms/SLPVectorizer/AArch64/horizontal.ll -Transforms/SLPVectorizer/AArch64/loadi8.ll -Transforms/SLPVectorizer/AArch64/phi-node-bitwidt-op-not.ll -Transforms/SLPVectorizer/AArch64/uselistorder.ll -Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll -Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll -Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll -Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll -Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll -Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll -Transforms/SLPVectorizer/const-bool-logical-or-reduction.ll -Transforms/SLPVectorizer/extracts-with-undefs.ll -Transforms/SLPVectorizer/freeze-signedness-missed.ll -Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll -Transforms/SLPVectorizer/gather_extract_from_vectorbuild.ll -Transforms/SLPVectorizer/insert-element-build-vector-const.ll -Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll -Transforms/SLPVectorizer/insert-element-build-vector.ll -Transforms/SLPVectorizer/logical-ops-poisonous-repeated.ll -Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll -Transforms/SLPVectorizer/minbitwidth-user-not-min.ll -Transforms/SLPVectorizer/partial-register-extract.ll -Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll -Transforms/SLPVectorizer/reorder-node.ll -Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll -Transforms/SLPVectorizer/revec.ll -Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll -Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll -Transforms/SLPVectorizer/RISCV/reordered-interleaved-loads.ll -Transforms/SLPVectorizer/RISCV/revec.ll -Transforms/SLPVectorizer/RISCV/select-profitability.ll -Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll -Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll -Transforms/SLPVectorizer/slp-deleted-inst.ll -Transforms/SLPVectorizer/SystemZ/cmp-ptr-minmax.ll -Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll -Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll -Transforms/SLPVectorizer/X86/bool-mask.ll -Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll -Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll -Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll -Transforms/SLPVectorizer/X86/cmp_sel.ll -Transforms/SLPVectorizer/X86/crash_7zip.ll -Transforms/SLPVectorizer/X86/crash_clear_undefs.ll -Transforms/SLPVectorizer/X86/crash_cmpop.ll -Transforms/SLPVectorizer/X86/debug-counter.ll -Transforms/SLPVectorizer/X86/debug-info-salvage.ll -Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll -Transforms/SLPVectorizer/X86/extracts-non-extendable.ll -Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll -Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll -Transforms/SLPVectorizer/X86/horizontal-minmax.ll -Transforms/SLPVectorizer/X86/insert-after-bundle.ll -Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll -Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll -Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll -Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll -Transforms/SLPVectorizer/X86/ordering-bug.ll -Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll -Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll -Transforms/SLPVectorizer/X86/pr46983.ll -Transforms/SLPVectorizer/X86/pr49933.ll -Transforms/SLPVectorizer/X86/propagate_ir_flags.ll -Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll -Transforms/SLPVectorizer/X86/reduction-logical.ll -Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll -Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll -Transforms/SLPVectorizer/X86/select-reduction-op.ll -Transforms/SLPVectorizer/X86/shrink_after_reorder.ll -Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll -Transforms/SLPVectorizer/X86/undef_vect.ll -Transforms/SLPVectorizer/X86/used-reduced-op.ll -Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll -Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll -Transforms/SLPVectorizer/X86/whole-registers-compare.ll Transforms/SROA/addrspacecast.ll Transforms/SROA/phi-and-select.ll Transforms/SROA/phi-gep.ll |