diff options
Diffstat (limited to 'llvm')
69 files changed, 4240 insertions, 896 deletions
diff --git a/llvm/benchmarks/Mustache.cpp b/llvm/benchmarks/Mustache.cpp index 6d24f54..996eca41 100644 --- a/llvm/benchmarks/Mustache.cpp +++ b/llvm/benchmarks/Mustache.cpp @@ -8,7 +8,7 @@ static const std::string LongHtmlString = [] { std::string S; S.reserve(500000); - for (int i = 0; i < 50000; ++i) { + for (int Idx = 0; Idx < 50000; ++Idx) { S += "<script>alert('xss');</script>"; } return S; @@ -153,7 +153,11 @@ static const std::string LargeOutputStringTemplate = "{{long_string}}"; // syntaxes. static void BM_Mustache_StringRendering(benchmark::State &state, const std::string &TplStr) { - llvm::mustache::Template Tpl(TplStr); + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(TplStr, Ctx); llvm::json::Value Data = llvm::json::Object({{"content", llvm::json::Value(LongHtmlString)}}); for (auto _ : state) { @@ -172,7 +176,11 @@ BENCHMARK_CAPTURE(BM_Mustache_StringRendering, Unescaped_Ampersand, // Tests the "hot render" cost of repeatedly traversing a deep and wide // JSON object. static void BM_Mustache_DeepTraversal(benchmark::State &state) { - llvm::mustache::Template Tpl(DeepTraversalTemplate); + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(DeepTraversalTemplate, Ctx); for (auto _ : state) { std::string Result; llvm::raw_string_ostream OS(Result); @@ -184,7 +192,12 @@ BENCHMARK(BM_Mustache_DeepTraversal); // Tests the "hot render" cost of pushing and popping a deep context stack. static void BM_Mustache_DeeplyNestedRendering(benchmark::State &state) { - llvm::mustache::Template Tpl(DeeplyNestedRenderingTemplate); + + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(DeeplyNestedRenderingTemplate, Ctx); for (auto _ : state) { std::string Result; llvm::raw_string_ostream OS(Result); @@ -197,7 +210,11 @@ BENCHMARK(BM_Mustache_DeeplyNestedRendering); // Tests the performance of the loop logic when iterating over a huge number of // items. static void BM_Mustache_HugeArrayIteration(benchmark::State &state) { - llvm::mustache::Template Tpl(HugeArrayIterationTemplate); + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(HugeArrayIterationTemplate, Ctx); for (auto _ : state) { std::string Result; llvm::raw_string_ostream OS(Result); @@ -209,8 +226,12 @@ BENCHMARK(BM_Mustache_HugeArrayIteration); // Tests the performance of the parser on a large, "wide" template. static void BM_Mustache_ComplexTemplateParsing(benchmark::State &state) { + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + for (auto _ : state) { - llvm::mustache::Template Tpl(ComplexTemplateParsingTemplate); + llvm::mustache::Template Tpl(ComplexTemplateParsingTemplate, Ctx); benchmark::DoNotOptimize(Tpl); } } @@ -218,8 +239,12 @@ BENCHMARK(BM_Mustache_ComplexTemplateParsing); // Tests the performance of the parser on a small, "deep" template. static void BM_Mustache_SmallTemplateParsing(benchmark::State &state) { + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + for (auto _ : state) { - llvm::mustache::Template Tpl(SmallTemplateParsingTemplate); + llvm::mustache::Template Tpl(SmallTemplateParsingTemplate, Ctx); benchmark::DoNotOptimize(Tpl); } } @@ -227,7 +252,11 @@ BENCHMARK(BM_Mustache_SmallTemplateParsing); // Tests the performance of rendering a template that includes a partial. static void BM_Mustache_PartialsRendering(benchmark::State &state) { - llvm::mustache::Template Tpl(ComplexPartialTemplate); + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(ComplexPartialTemplate, Ctx); Tpl.registerPartial("item_partial", ItemPartialTemplate); llvm::json::Value Data = HugeArrayData; @@ -243,7 +272,11 @@ BENCHMARK(BM_Mustache_PartialsRendering); // Tests the performance of the underlying buffer management when generating a // very large output. static void BM_Mustache_LargeOutputString(benchmark::State &state) { - llvm::mustache::Template Tpl(LargeOutputStringTemplate); + llvm::BumpPtrAllocator Allocator; + llvm::StringSaver Saver(Allocator); + llvm::mustache::MustacheContext Ctx(Allocator, Saver); + + llvm::mustache::Template Tpl(LargeOutputStringTemplate, Ctx); for (auto _ : state) { std::string Result; llvm::raw_string_ostream OS(Result); diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst index 965a21b..bf3de11 100644 --- a/llvm/docs/MLGO.rst +++ b/llvm/docs/MLGO.rst @@ -508,7 +508,7 @@ embeddings can be computed and accessed via an ``ir2vec::Embedder`` instance. .. code-block:: c++ - const ir2vec::Embedding &FuncVector = Emb->getFunctionVector(); + ir2vec::Embedding FuncVector = Emb->getFunctionVector(); Currently, ``Embedder`` can generate embeddings at three levels: Instructions, Basic Blocks, and Functions. Appropriate getters are provided to access the diff --git a/llvm/include/llvm/ADT/Bitset.h b/llvm/include/llvm/ADT/Bitset.h index b1e539e..0dfeb20 100644 --- a/llvm/include/llvm/ADT/Bitset.h +++ b/llvm/include/llvm/ADT/Bitset.h @@ -38,14 +38,22 @@ class Bitset { static constexpr unsigned NumWords = (NumBits + BitwordBits - 1) / BitwordBits; -protected: using StorageType = std::array<BitWord, NumWords>; - -private: StorageType Bits{}; protected: - constexpr Bitset(const StorageType &B) : Bits{B} {} + constexpr Bitset(const std::array<uint64_t, (NumBits + 63) / 64> &B) { + if constexpr (sizeof(BitWord) == sizeof(uint64_t)) { + for (size_t I = 0; I != B.size(); ++I) + Bits[I] = B[I]; + } else { + for (size_t I = 0; I != B.size(); ++I) { + uint64_t Elt = B[I]; + Bits[2 * I] = static_cast<uint32_t>(Elt); + Bits[2 * I + 1] = static_cast<uint32_t>(Elt >> 32); + } + } + } public: constexpr Bitset() = default; diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 81409df..6bc51fe 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -533,21 +533,20 @@ protected: /// in the IR instructions to generate the vector representation. const float OpcWeight, TypeWeight, ArgWeight; - // Utility maps - these are used to store the vector representations of - // instructions, basic blocks and functions. - mutable Embedding FuncVector; - mutable BBEmbeddingsMap BBVecMap; - mutable InstEmbeddingsMap InstVecMap; - - LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab); + LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab) + : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()), + OpcWeight(ir2vec::OpcWeight), TypeWeight(ir2vec::TypeWeight), + ArgWeight(ir2vec::ArgWeight) {} - /// Function to compute embeddings. It generates embeddings for all - /// the instructions and basic blocks in the function F. - void computeEmbeddings() const; + /// Function to compute embeddings. + Embedding computeEmbeddings() const; /// Function to compute the embedding for a given basic block. + Embedding computeEmbeddings(const BasicBlock &BB) const; + + /// Function to compute the embedding for a given instruction. /// Specific to the kind of embeddings being computed. - virtual void computeEmbeddings(const BasicBlock &BB) const = 0; + virtual Embedding computeEmbeddings(const Instruction &I) const = 0; public: virtual ~Embedder() = default; @@ -556,23 +555,27 @@ public: LLVM_ABI static std::unique_ptr<Embedder> create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab); - /// Returns a map containing instructions and the corresponding embeddings for - /// the function F if it has been computed. If not, it computes the embeddings - /// for the function and returns the map. - LLVM_ABI const InstEmbeddingsMap &getInstVecMap() const; - - /// Returns a map containing basic block and the corresponding embeddings for - /// the function F if it has been computed. If not, it computes the embeddings - /// for the function and returns the map. - LLVM_ABI const BBEmbeddingsMap &getBBVecMap() const; + /// Computes and returns the embedding for a given instruction in the function + /// F + LLVM_ABI Embedding getInstVector(const Instruction &I) const { + return computeEmbeddings(I); + } - /// Returns the embedding for a given basic block in the function F if it has - /// been computed. If not, it computes the embedding for the basic block and - /// returns it. - LLVM_ABI const Embedding &getBBVector(const BasicBlock &BB) const; + /// Computes and returns the embedding for a given basic block in the function + /// F + LLVM_ABI Embedding getBBVector(const BasicBlock &BB) const { + return computeEmbeddings(BB); + } /// Computes and returns the embedding for the current function. - LLVM_ABI const Embedding &getFunctionVector() const; + LLVM_ABI Embedding getFunctionVector() const { return computeEmbeddings(); } + + /// Invalidate embeddings if cached. The embeddings may not be relevant + /// anymore when the IR changes due to transformations. In such cases, the + /// cached embeddings should be invalidated to ensure + /// correctness/recomputation. This is a no-op for SymbolicEmbedder but + /// removes all the cached entries in FlowAwareEmbedder. + virtual void invalidateEmbeddings() { return; } }; /// Class for computing the Symbolic embeddings of IR2Vec. @@ -580,7 +583,7 @@ public: /// representations obtained from the Vocabulary. class LLVM_ABI SymbolicEmbedder : public Embedder { private: - void computeEmbeddings(const BasicBlock &BB) const override; + Embedding computeEmbeddings(const Instruction &I) const override; public: SymbolicEmbedder(const Function &F, const Vocabulary &Vocab) @@ -592,11 +595,15 @@ public: /// embeddings, and additionally capture the flow information in the IR. class LLVM_ABI FlowAwareEmbedder : public Embedder { private: - void computeEmbeddings(const BasicBlock &BB) const override; + // FlowAware embeddings would benefit from caching instruction embeddings as + // they are reused while computing the embeddings of other instructions. + mutable InstEmbeddingsMap InstVecMap; + Embedding computeEmbeddings(const Instruction &I) const override; public: FlowAwareEmbedder(const Function &F, const Vocabulary &Vocab) : Embedder(F, Vocab) {} + void invalidateEmbeddings() override { InstVecMap.clear(); } }; } // namespace ir2vec diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index 930c6f9..4a54caa 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -200,6 +200,12 @@ public: /// with another range. The resultant range is guaranteed to include the /// elements of both sets, but may contain more. LLVM_ABI ConstantFPRange unionWith(const ConstantFPRange &CR) const; + + /// Calculate absolute value range. + LLVM_ABI ConstantFPRange abs() const; + + /// Calculate range of negated values. + LLVM_ABI ConstantFPRange negate() const; }; inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 5f7225e..a426fb0 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -555,6 +556,7 @@ public: Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {} LLVM_ABI Argument(StringRef Key, DebugLoc dl); LLVM_ABI Argument(StringRef Key, InstructionCost C); + LLVM_ABI Argument(StringRef Key, BranchProbability P); }; /// \p PassName is the name of the pass emitting this diagnostic. \p diff --git a/llvm/include/llvm/Support/Mustache.h b/llvm/include/llvm/Support/Mustache.h index ee9f406..83047f2 100644 --- a/llvm/include/llvm/Support/Mustache.h +++ b/llvm/include/llvm/Support/Mustache.h @@ -71,6 +71,8 @@ #include "Error.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/JSON.h" @@ -84,10 +86,15 @@ using Lambda = std::function<llvm::json::Value()>; using SectionLambda = std::function<llvm::json::Value(std::string)>; class ASTNode; -using AstPtr = std::unique_ptr<ASTNode>; +using AstPtr = ASTNode *; using EscapeMap = DenseMap<char, std::string>; +using ASTNodeList = iplist<ASTNode>; struct MustacheContext { + MustacheContext(BumpPtrAllocator &Allocator, StringSaver &Saver) + : Allocator(Allocator), Saver(Saver) {} + BumpPtrAllocator &Allocator; + StringSaver &Saver; StringMap<AstPtr> Partials; StringMap<Lambda> Lambdas; StringMap<SectionLambda> SectionLambdas; @@ -98,7 +105,7 @@ struct MustacheContext { // and Lambdas that are registered with it. class Template { public: - LLVM_ABI Template(StringRef TemplateStr); + LLVM_ABI Template(StringRef TemplateStr, MustacheContext &Ctx); Template(const Template &) = delete; @@ -110,7 +117,7 @@ public: // type. LLVM_ABI ~Template(); - LLVM_ABI Template &operator=(Template &&Other) noexcept; + Template &operator=(Template &&) = delete; LLVM_ABI void render(const llvm::json::Value &Data, llvm::raw_ostream &OS); @@ -126,7 +133,7 @@ public: LLVM_ABI void overrideEscapeCharacters(DenseMap<char, std::string> Escapes); private: - MustacheContext Ctx; + MustacheContext &Ctx; AstPtr Tree; }; } // namespace llvm::mustache diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 5e57dca..774063b 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -116,7 +116,7 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0> ]>; def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl - SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2> + SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 2> ]>; def SDTIntShiftPairOp : SDTypeProfile<2, 3, [ // shl_parts, sra_parts, srl_parts SDTCisInt<0>, SDTCisSameAs<1, 0>, @@ -205,6 +205,10 @@ def SDTSetCC : SDTypeProfile<1, 3, [ // setcc SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; +def SDTFSetCC : SDTypeProfile<1, 3, [ // strict_fsetcc, strict_fsetccs + SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> +]>; + def SDTSelect : SDTypeProfile<1, 3, [ // select SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3> ]>; @@ -699,8 +703,8 @@ def strict_bf16_to_fp : SDNode<"ISD::STRICT_BF16_TO_FP", def strict_fp_to_bf16 : SDNode<"ISD::STRICT_FP_TO_BF16", SDTFPToIntOp, [SDNPHasChain]>; -def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>; -def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>; +def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTFSetCC, [SDNPHasChain]>; +def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTFSetCC, [SDNPHasChain]>; def get_fpenv : SDNode<"ISD::GET_FPENV", SDTGetFPStateOp, [SDNPHasChain]>; def set_fpenv : SDNode<"ISD::SET_FPENV", SDTSetFPStateOp, [SDNPHasChain]>; diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 1794a60..85b5372 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -153,11 +153,6 @@ void Embedding::print(raw_ostream &OS) const { // Embedder and its subclasses //===----------------------------------------------------------------------===// -Embedder::Embedder(const Function &F, const Vocabulary &Vocab) - : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()), - OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight), - FuncVector(Embedding(Dimension)) {} - std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab) { switch (Mode) { @@ -169,110 +164,85 @@ std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F, return nullptr; } -const InstEmbeddingsMap &Embedder::getInstVecMap() const { - if (InstVecMap.empty()) - computeEmbeddings(); - return InstVecMap; -} - -const BBEmbeddingsMap &Embedder::getBBVecMap() const { - if (BBVecMap.empty()) - computeEmbeddings(); - return BBVecMap; -} - -const Embedding &Embedder::getBBVector(const BasicBlock &BB) const { - auto It = BBVecMap.find(&BB); - if (It != BBVecMap.end()) - return It->second; - computeEmbeddings(BB); - return BBVecMap[&BB]; -} +Embedding Embedder::computeEmbeddings() const { + Embedding FuncVector(Dimension, 0.0); -const Embedding &Embedder::getFunctionVector() const { - // Currently, we always (re)compute the embeddings for the function. - // This is cheaper than caching the vector. - computeEmbeddings(); - return FuncVector; -} - -void Embedder::computeEmbeddings() const { if (F.isDeclaration()) - return; - - FuncVector = Embedding(Dimension, 0.0); + return FuncVector; // Consider only the basic blocks that are reachable from entry - for (const BasicBlock *BB : depth_first(&F)) { - computeEmbeddings(*BB); - FuncVector += BBVecMap[BB]; - } + for (const BasicBlock *BB : depth_first(&F)) + FuncVector += computeEmbeddings(*BB); + return FuncVector; } -void SymbolicEmbedder::computeEmbeddings(const BasicBlock &BB) const { +Embedding Embedder::computeEmbeddings(const BasicBlock &BB) const { Embedding BBVector(Dimension, 0); // We consider only the non-debug and non-pseudo instructions - for (const auto &I : BB.instructionsWithoutDebug()) { - Embedding ArgEmb(Dimension, 0); - for (const auto &Op : I.operands()) - ArgEmb += Vocab[*Op]; - auto InstVector = - Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; - if (const auto *IC = dyn_cast<CmpInst>(&I)) - InstVector += Vocab[IC->getPredicate()]; - InstVecMap[&I] = InstVector; - BBVector += InstVector; - } - BBVecMap[&BB] = BBVector; -} - -void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const { - Embedding BBVector(Dimension, 0); + for (const auto &I : BB.instructionsWithoutDebug()) + BBVector += computeEmbeddings(I); + return BBVector; +} + +Embedding SymbolicEmbedder::computeEmbeddings(const Instruction &I) const { + // Currently, we always (re)compute the embeddings for symbolic embedder. + // This is cheaper than caching the vectors. + Embedding ArgEmb(Dimension, 0); + for (const auto &Op : I.operands()) + ArgEmb += Vocab[*Op]; + auto InstVector = + Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; + if (const auto *IC = dyn_cast<CmpInst>(&I)) + InstVector += Vocab[IC->getPredicate()]; + return InstVector; +} + +Embedding FlowAwareEmbedder::computeEmbeddings(const Instruction &I) const { + // If we have already computed the embedding for this instruction, return it + auto It = InstVecMap.find(&I); + if (It != InstVecMap.end()) + return It->second; - // We consider only the non-debug and non-pseudo instructions - for (const auto &I : BB.instructionsWithoutDebug()) { - // TODO: Handle call instructions differently. - // For now, we treat them like other instructions - Embedding ArgEmb(Dimension, 0); - for (const auto &Op : I.operands()) { - // If the operand is defined elsewhere, we use its embedding - if (const auto *DefInst = dyn_cast<Instruction>(Op)) { - auto DefIt = InstVecMap.find(DefInst); - // Fixme (#159171): Ideally we should never miss an instruction - // embedding here. - // But when we have cyclic dependencies (e.g., phi - // nodes), we might miss the embedding. In such cases, we fall back to - // using the vocabulary embedding. This can be fixed by iterating to a - // fixed-point, or by using a simple solver for the set of simultaneous - // equations. - // Another case when we might miss an instruction embedding is when - // the operand instruction is in a different basic block that has not - // been processed yet. This can be fixed by processing the basic blocks - // in a topological order. - if (DefIt != InstVecMap.end()) - ArgEmb += DefIt->second; - else - ArgEmb += Vocab[*Op]; - } - // If the operand is not defined by an instruction, we use the vocabulary - else { - LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: " - << *Op << "=" << Vocab[*Op][0] << "\n"); + // TODO: Handle call instructions differently. + // For now, we treat them like other instructions + Embedding ArgEmb(Dimension, 0); + for (const auto &Op : I.operands()) { + // If the operand is defined elsewhere, we use its embedding + if (const auto *DefInst = dyn_cast<Instruction>(Op)) { + auto DefIt = InstVecMap.find(DefInst); + // Fixme (#159171): Ideally we should never miss an instruction + // embedding here. + // But when we have cyclic dependencies (e.g., phi + // nodes), we might miss the embedding. In such cases, we fall back to + // using the vocabulary embedding. This can be fixed by iterating to a + // fixed-point, or by using a simple solver for the set of simultaneous + // equations. + // Another case when we might miss an instruction embedding is when + // the operand instruction is in a different basic block that has not + // been processed yet. This can be fixed by processing the basic blocks + // in a topological order. + if (DefIt != InstVecMap.end()) + ArgEmb += DefIt->second; + else ArgEmb += Vocab[*Op]; - } } - // Create the instruction vector by combining opcode, type, and arguments - // embeddings - auto InstVector = - Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; - // Add compare predicate embedding as an additional operand if applicable - if (const auto *IC = dyn_cast<CmpInst>(&I)) - InstVector += Vocab[IC->getPredicate()]; - InstVecMap[&I] = InstVector; - BBVector += InstVector; + // If the operand is not defined by an instruction, we use the + // vocabulary + else { + LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: " + << *Op << "=" << Vocab[*Op][0] << "\n"); + ArgEmb += Vocab[*Op]; + } } - BBVecMap[&BB] = BBVector; + // Create the instruction vector by combining opcode, type, and arguments + // embeddings + auto InstVector = + Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; + if (const auto *IC = dyn_cast<CmpInst>(&I)) + InstVector += Vocab[IC->getPredicate()]; + InstVecMap[&I] = InstVector; + return InstVector; } // ==----------------------------------------------------------------------===// @@ -695,25 +665,17 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M, Emb->getFunctionVector().print(OS); OS << "Basic block vectors:\n"; - const auto &BBMap = Emb->getBBVecMap(); for (const BasicBlock &BB : F) { - auto It = BBMap.find(&BB); - if (It != BBMap.end()) { - OS << "Basic block: " << BB.getName() << ":\n"; - It->second.print(OS); - } + OS << "Basic block: " << BB.getName() << ":\n"; + Emb->getBBVector(BB).print(OS); } OS << "Instruction vectors:\n"; - const auto &InstMap = Emb->getInstVecMap(); for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { - auto It = InstMap.find(&I); - if (It != InstMap.end()) { - OS << "Instruction: "; - I.print(OS); - It->second.print(OS); - } + OS << "Instruction: "; + I.print(OS); + Emb->getInstVector(I).print(OS); } } } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index ebfea8e..e17a214 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy( } if (CP.getNewRC()) { + if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) { + LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC()) + << "are available for allocation\n"); + return false; + } + auto SrcRC = MRI->getRegClass(CP.getSrcReg()); auto DstRC = MRI->getRegClass(CP.getDstReg()); unsigned SrcIdx = CP.getSrcIdx(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c5c3866..5ffdc4e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19340,8 +19340,10 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { EVT VT = N->getValueType(0); const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); - bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; - bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; + bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; + bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM; + bool IsMin = + Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold. @@ -19356,34 +19358,53 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) { const APFloat &AF = N1CFP->getValueAPF(); - // minnum(X, nan) -> X - // maxnum(X, nan) -> X - // minimum(X, nan) -> nan - // maximum(X, nan) -> nan - if (AF.isNaN()) - return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + // minnum(X, qnan) -> X + // maxnum(X, qnan) -> X + // minnum(X, snan) -> qnan + // maxnum(X, snan) -> qnan + // minimum(X, nan) -> qnan + // maximum(X, nan) -> qnan + // minimumnum(X, nan) -> X + // maximumnum(X, nan) -> X + if (AF.isNaN()) { + if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) { + if (AF.isSignaling()) + return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT); + return N->getOperand(1); + } + return N->getOperand(0); + } // In the following folds, inf can be replaced with the largest finite // float, if the ninf flag is set. if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { - // minnum(X, -inf) -> -inf - // maxnum(X, +inf) -> +inf + // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation) + // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation) // minimum(X, -inf) -> -inf if nnan // maximum(X, +inf) -> +inf if nnan - if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs())) + // minimumnum(X, -inf) -> -inf + // maximumnum(X, +inf) -> +inf + if (IsMin == AF.isNegative() && + (!PropAllNaNsToQNaNs || Flags.hasNoNaNs())) return N->getOperand(1); // minnum(X, +inf) -> X if nnan // maxnum(X, -inf) -> X if nnan - // minimum(X, +inf) -> X - // maximum(X, -inf) -> X - if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs())) + // minimum(X, +inf) -> X (ignoring quieting of sNaNs) + // maximum(X, -inf) -> X (ignoring quieting of sNaNs) + // minimumnum(X, +inf) -> X if nnan + // maximumnum(X, -inf) -> X if nnan + if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs())) return N->getOperand(0); } } + // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM + if (Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM) + return SDValue(); + if (SDValue SD = reassociateReduction( - PropagatesNaN + PropAllNaNsToQNaNs ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM) : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX), Opc, SDLoc(N), VT, N0, N1, Flags)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 175753f..6c11c5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -234,6 +234,19 @@ static bool dontUseFastISelFor(const Function &Fn) { }); } +static bool maintainPGOProfile(const TargetMachine &TM, + CodeGenOptLevel OptLevel) { + if (OptLevel != CodeGenOptLevel::None) + return true; + if (TM.getPGOOption()) { + const PGOOptions &Options = *TM.getPGOOption(); + return Options.Action == PGOOptions::PGOAction::IRUse || + Options.Action == PGOOptions::PGOAction::SampleUse || + Options.CSAction == PGOOptions::CSPGOAction::CSIRUse; + } + return false; +} + namespace llvm { //===--------------------------------------------------------------------===// @@ -395,6 +408,7 @@ SelectionDAGISel::~SelectionDAGISel() { delete CurDAG; } void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { CodeGenOptLevel OptLevel = Selector->OptLevel; + bool RegisterPGOPasses = maintainPGOProfile(Selector->TM, Selector->OptLevel); if (OptLevel != CodeGenOptLevel::None) AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); @@ -403,15 +417,15 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); - if (UseMBPI && OptLevel != CodeGenOptLevel::None) - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + if (UseMBPI && RegisterPGOPasses) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for // the module. AU.addRequired<AssignmentTrackingAnalysis>(); AU.addPreserved<AssignmentTrackingAnalysis>(); - if (OptLevel != CodeGenOptLevel::None) - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (RegisterPGOPasses) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -464,6 +478,7 @@ void SelectionDAGISel::initializeAnalysisResults( (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -474,7 +489,7 @@ void SelectionDAGISel::initializeAnalysisResults( auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent()); BlockFrequencyInfo *BFI = nullptr; FAM.getResult<BlockFrequencyAnalysis>(Fn); - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn); FunctionVarLocs const *FnVarLocs = nullptr; @@ -492,7 +507,7 @@ void SelectionDAGISel::initializeAnalysisResults( // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn); else FuncInfo->BPI = nullptr; @@ -518,6 +533,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -528,7 +544,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn); auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); FunctionVarLocs const *FnVarLocs = nullptr; @@ -549,7 +565,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 7509188..fba6942 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -391,3 +391,23 @@ ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const { return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper), MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN); } + +ConstantFPRange ConstantFPRange::abs() const { + if (isNaNOnly()) + return *this; + // Check if the range is all non-negative or all non-positive. + if (Lower.isNegative() == Upper.isNegative()) { + if (Lower.isNegative()) + return negate(); + return *this; + } + // The range contains both positive and negative values. + APFloat NewLower = APFloat::getZero(getSemantics()); + APFloat NewUpper = maxnum(-Lower, Upper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, + MayBeSNaN); +} + +ConstantFPRange ConstantFPRange::negate() const { + return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN); +} diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 4f37624..8e6d654 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -273,6 +273,13 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, C.print(OS); } +DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, + BranchProbability P) + : Key(std::string(Key)) { + raw_string_ostream OS(Val); + P.print(OS); +} + DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, DebugLoc Loc) : Key(std::string(Key)), Loc(Loc) { if (Loc) { diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp index 47860c0..708e79d 100644 --- a/llvm/lib/Support/Mustache.cpp +++ b/llvm/lib/Support/Mustache.cpp @@ -20,7 +20,7 @@ using namespace llvm::mustache; namespace { -using Accessor = SmallVector<std::string>; +using Accessor = ArrayRef<StringRef>; static bool isFalsey(const json::Value &V) { return V.getAsNull() || (V.getAsBoolean() && !V.getAsBoolean().value()) || @@ -34,23 +34,32 @@ static bool isContextFalsey(const json::Value *V) { return isFalsey(*V); } -static Accessor splitMustacheString(StringRef Str) { +static Accessor splitMustacheString(StringRef Str, MustacheContext &Ctx) { // We split the mustache string into an accessor. // For example: // "a.b.c" would be split into {"a", "b", "c"} // We make an exception for a single dot which // refers to the current context. - Accessor Tokens; + SmallVector<StringRef> Tokens; if (Str == ".") { - Tokens.emplace_back(Str); - return Tokens; - } - while (!Str.empty()) { - StringRef Part; - std::tie(Part, Str) = Str.split("."); - Tokens.emplace_back(Part.trim()); + // "." is a special accessor that refers to the current context. + // It's a literal, so it doesn't need to be saved. + Tokens.push_back("."); + } else { + while (!Str.empty()) { + StringRef Part; + std::tie(Part, Str) = Str.split('.'); + // Each part of the accessor needs to be saved to the arena + // to ensure it has a stable address. + Tokens.push_back(Ctx.Saver.save(Part.trim())); + } } - return Tokens; + // Now, allocate memory for the array of StringRefs in the arena. + StringRef *ArenaTokens = Ctx.Allocator.Allocate<StringRef>(Tokens.size()); + // Copy the StringRefs from the stack vector to the arena. + std::copy(Tokens.begin(), Tokens.end(), ArenaTokens); + // Return an ArrayRef pointing to the stable arena memory. + return ArrayRef<StringRef>(ArenaTokens, Tokens.size()); } } // namespace @@ -97,23 +106,23 @@ public: SetDelimiter, }; - Token(std::string Str) - : TokenType(Type::Text), RawBody(std::move(Str)), TokenBody(RawBody), + Token(StringRef Str) + : TokenType(Type::Text), RawBody(Str), TokenBody(RawBody), AccessorValue({}), Indentation(0) {}; - Token(std::string RawBody, std::string TokenBody, char Identifier) - : RawBody(std::move(RawBody)), TokenBody(std::move(TokenBody)), - Indentation(0) { + Token(StringRef RawBody, StringRef TokenBody, char Identifier, + MustacheContext &Ctx) + : RawBody(RawBody), TokenBody(TokenBody), Indentation(0) { TokenType = getTokenType(Identifier); if (TokenType == Type::Comment) return; StringRef AccessorStr(this->TokenBody); if (TokenType != Type::Variable) AccessorStr = AccessorStr.substr(1); - AccessorValue = splitMustacheString(StringRef(AccessorStr).trim()); + AccessorValue = splitMustacheString(StringRef(AccessorStr).trim(), Ctx); } - Accessor getAccessor() const { return AccessorValue; } + ArrayRef<StringRef> getAccessor() const { return AccessorValue; } Type getType() const { return TokenType; } @@ -144,16 +153,16 @@ public: Type TokenType; // RawBody is the original string that was tokenized. - std::string RawBody; + StringRef RawBody; // TokenBody is the original string with the identifier removed. - std::string TokenBody; - Accessor AccessorValue; + StringRef TokenBody; + ArrayRef<StringRef> AccessorValue; size_t Indentation; }; using EscapeMap = DenseMap<char, std::string>; -class ASTNode { +class ASTNode : public ilist_node<ASTNode> { public: enum Type { Root, @@ -168,18 +177,19 @@ public: ASTNode(MustacheContext &Ctx) : Ctx(Ctx), Ty(Type::Root), Parent(nullptr), ParentContext(nullptr) {} - ASTNode(MustacheContext &Ctx, std::string Body, ASTNode *Parent) - : Ctx(Ctx), Ty(Type::Text), Body(std::move(Body)), Parent(Parent), + ASTNode(MustacheContext &Ctx, StringRef Body, ASTNode *Parent) + : Ctx(Ctx), Ty(Type::Text), Body(Body), Parent(Parent), ParentContext(nullptr) {} // Constructor for Section/InvertSection/Variable/UnescapeVariable Nodes - ASTNode(MustacheContext &Ctx, Type Ty, Accessor Accessor, ASTNode *Parent) - : Ctx(Ctx), Ty(Ty), Parent(Parent), AccessorValue(std::move(Accessor)), + ASTNode(MustacheContext &Ctx, Type Ty, ArrayRef<StringRef> Accessor, + ASTNode *Parent) + : Ctx(Ctx), Ty(Ty), Parent(Parent), AccessorValue(Accessor), ParentContext(nullptr) {} - void addChild(AstPtr Child) { Children.emplace_back(std::move(Child)); }; + void addChild(AstPtr Child) { Children.push_back(Child); }; - void setRawBody(std::string NewBody) { RawBody = std::move(NewBody); }; + void setRawBody(StringRef NewBody) { RawBody = NewBody; }; void setIndentation(size_t NewIndentation) { Indentation = NewIndentation; }; @@ -212,28 +222,27 @@ private: MustacheContext &Ctx; Type Ty; size_t Indentation = 0; - std::string RawBody; - std::string Body; + StringRef RawBody; + StringRef Body; ASTNode *Parent; - // TODO: switch implementation to SmallVector<T> - std::vector<AstPtr> Children; - const Accessor AccessorValue; + ASTNodeList Children; + const ArrayRef<StringRef> AccessorValue; const llvm::json::Value *ParentContext; }; // A wrapper for arena allocator for ASTNodes static AstPtr createRootNode(MustacheContext &Ctx) { - return std::make_unique<ASTNode>(Ctx); + return new (Ctx.Allocator.Allocate<ASTNode>()) ASTNode(Ctx); } -static AstPtr createNode(MustacheContext &Ctx, ASTNode::Type T, Accessor A, - ASTNode *Parent) { - return std::make_unique<ASTNode>(Ctx, T, std::move(A), Parent); +static AstPtr createNode(MustacheContext &Ctx, ASTNode::Type T, + ArrayRef<StringRef> A, ASTNode *Parent) { + return new (Ctx.Allocator.Allocate<ASTNode>()) ASTNode(Ctx, T, A, Parent); } -static AstPtr createTextNode(MustacheContext &Ctx, std::string Body, +static AstPtr createTextNode(MustacheContext &Ctx, StringRef Body, ASTNode *Parent) { - return std::make_unique<ASTNode>(Ctx, std::move(Body), Parent); + return new (Ctx.Allocator.Allocate<ASTNode>()) ASTNode(Ctx, Body, Parent); } // Function to check if there is meaningful text behind. @@ -295,9 +304,9 @@ static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) { StringRef NextTokenBody = NextToken.TokenBody; // Cut off the leading newline which could be \n or \r\n. if (NextTokenBody.starts_with("\r\n")) - NextToken.TokenBody = NextTokenBody.substr(2).str(); + NextToken.TokenBody = NextTokenBody.substr(2); else if (NextTokenBody.starts_with("\n")) - NextToken.TokenBody = NextTokenBody.substr(1).str(); + NextToken.TokenBody = NextTokenBody.substr(1); } // Adjust previous token body if there no text behind. @@ -312,7 +321,7 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx, StringRef PrevTokenBody = PrevToken.TokenBody; StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v"); size_t Indentation = PrevTokenBody.size() - Unindented.size(); - PrevToken.TokenBody = Unindented.str(); + PrevToken.TokenBody = Unindented; CurrentToken.setIndentation(Indentation); } @@ -402,21 +411,20 @@ static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open, } static std::optional<std::pair<StringRef, StringRef>> -processTag(const Tag &T, SmallVectorImpl<Token> &Tokens) { +processTag(const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) { LLVM_DEBUG(dbgs() << "[Tag] " << T.FullMatch << ", Content: " << T.Content << ", Kind: " << tagKindToString(T.TagKind) << "\n"); if (T.TagKind == Tag::Kind::Triple) { - Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&'); + Tokens.emplace_back(T.FullMatch, Ctx.Saver.save("&" + T.Content), '&', Ctx); return std::nullopt; } StringRef Interpolated = T.Content; - std::string RawBody = T.FullMatch.str(); if (!Interpolated.trim().starts_with("=")) { char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front(); - Tokens.emplace_back(RawBody, Interpolated.str(), Front); + Tokens.emplace_back(T.FullMatch, Interpolated, Front, Ctx); return std::nullopt; } - Tokens.emplace_back(RawBody, Interpolated.str(), '='); + Tokens.emplace_back(T.FullMatch, Interpolated, '=', Ctx); StringRef DelimSpec = Interpolated.trim(); DelimSpec = DelimSpec.drop_front(1); DelimSpec = DelimSpec.take_until([](char C) { return C == '='; }); @@ -432,7 +440,7 @@ processTag(const Tag &T, SmallVectorImpl<Token> &Tokens) { // The mustache spec allows {{{ }}} to unescape variables, // but we don't support that here. An unescape variable // is represented only by {{& variable}}. -static SmallVector<Token> tokenize(StringRef Template) { +static SmallVector<Token> tokenize(StringRef Template, MustacheContext &Ctx) { LLVM_DEBUG(dbgs() << "[Tokenize Template] \"" << Template << "\"\n"); SmallVector<Token> Tokens; SmallString<8> Open("{{"); @@ -446,19 +454,17 @@ static SmallVector<Token> tokenize(StringRef Template) { if (T.TagKind == Tag::Kind::None) { // No more tags, the rest is text. - Tokens.emplace_back(Template.substr(Start).str()); - LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \"" - << Template.substr(Start) << "\"\n"); + Tokens.emplace_back(Template.substr(Start)); break; } // Add the text before the tag. if (T.StartPosition > Start) { StringRef Text = Template.substr(Start, T.StartPosition - Start); - Tokens.emplace_back(Text.str()); + Tokens.emplace_back(Text); } - if (auto NewDelims = processTag(T, Tokens)) { + if (auto NewDelims = processTag(T, Tokens, Ctx)) { std::tie(Open, Close) = *NewDelims; } @@ -614,20 +620,20 @@ void Parser::parseSection(ASTNode *Parent, ASTNode::Type Ty, const Accessor &A) { AstPtr CurrentNode = createNode(Ctx, Ty, A, Parent); size_t Start = CurrentPtr; - parseMustache(CurrentNode.get()); + parseMustache(CurrentNode); const size_t End = CurrentPtr - 1; - std::string RawBody; + SmallString<128> RawBody; for (std::size_t I = Start; I < End; I++) RawBody += Tokens[I].RawBody; - CurrentNode->setRawBody(std::move(RawBody)); - Parent->addChild(std::move(CurrentNode)); + CurrentNode->setRawBody(Ctx.Saver.save(StringRef(RawBody))); + Parent->addChild(CurrentNode); } AstPtr Parser::parse() { - Tokens = tokenize(TemplateStr); + Tokens = tokenize(TemplateStr, Ctx); CurrentPtr = 0; AstPtr RootNode = createRootNode(Ctx); - parseMustache(RootNode.get()); + parseMustache(RootNode); return RootNode; } @@ -636,31 +642,29 @@ void Parser::parseMustache(ASTNode *Parent) { while (CurrentPtr < Tokens.size()) { Token CurrentToken = Tokens[CurrentPtr]; CurrentPtr++; - Accessor A = CurrentToken.getAccessor(); + ArrayRef<StringRef> A = CurrentToken.getAccessor(); AstPtr CurrentNode; switch (CurrentToken.getType()) { case Token::Type::Text: { - CurrentNode = - createTextNode(Ctx, std::move(CurrentToken.TokenBody), Parent); - Parent->addChild(std::move(CurrentNode)); + CurrentNode = createTextNode(Ctx, CurrentToken.TokenBody, Parent); + Parent->addChild(CurrentNode); break; } case Token::Type::Variable: { - CurrentNode = createNode(Ctx, ASTNode::Variable, std::move(A), Parent); - Parent->addChild(std::move(CurrentNode)); + CurrentNode = createNode(Ctx, ASTNode::Variable, A, Parent); + Parent->addChild(CurrentNode); break; } case Token::Type::UnescapeVariable: { - CurrentNode = - createNode(Ctx, ASTNode::UnescapeVariable, std::move(A), Parent); - Parent->addChild(std::move(CurrentNode)); + CurrentNode = createNode(Ctx, ASTNode::UnescapeVariable, A, Parent); + Parent->addChild(CurrentNode); break; } case Token::Type::Partial: { - CurrentNode = createNode(Ctx, ASTNode::Partial, std::move(A), Parent); + CurrentNode = createNode(Ctx, ASTNode::Partial, A, Parent); CurrentNode->setIndentation(CurrentToken.getIndentation()); - Parent->addChild(std::move(CurrentNode)); + Parent->addChild(CurrentNode); break; } case Token::Type::SectionOpen: { @@ -694,8 +698,7 @@ static void toMustacheString(const json::Value &Data, raw_ostream &OS) { return; } case json::Value::String: { - auto Str = *Data.getAsString(); - OS << Str.str(); + OS << *Data.getAsString(); return; } @@ -727,7 +730,7 @@ void ASTNode::renderPartial(const json::Value &CurrentCtx, << ", Indentation:" << Indentation << "\n"); auto Partial = Ctx.Partials.find(AccessorValue[0]); if (Partial != Ctx.Partials.end()) - renderPartial(CurrentCtx, OS, Partial->getValue().get()); + renderPartial(CurrentCtx, OS, Partial->getValue()); } void ASTNode::renderVariable(const json::Value &CurrentCtx, @@ -858,8 +861,8 @@ const json::Value *ASTNode::findContext() { void ASTNode::renderChild(const json::Value &Contexts, MustacheOutputStream &OS) { - for (AstPtr &Child : Children) - Child->render(Contexts, OS); + for (ASTNode &Child : Children) + Child.render(Contexts, OS); } void ASTNode::renderPartial(const json::Value &Contexts, @@ -869,7 +872,7 @@ void ASTNode::renderPartial(const json::Value &Contexts, Partial->render(Contexts, IS); } -void ASTNode::renderLambdas(const json::Value &Contexts, +void ASTNode::renderLambdas(const llvm::json::Value &Contexts, MustacheOutputStream &OS, Lambda &L) { json::Value LambdaResult = L(); std::string LambdaStr; @@ -886,9 +889,9 @@ void ASTNode::renderLambdas(const json::Value &Contexts, LambdaNode->render(Contexts, OS); } -void ASTNode::renderSectionLambdas(const json::Value &Contexts, +void ASTNode::renderSectionLambdas(const llvm::json::Value &Contexts, MustacheOutputStream &OS, SectionLambda &L) { - json::Value Return = L(RawBody); + json::Value Return = L(RawBody.str()); if (isFalsey(Return)) return; std::string LambdaStr; @@ -899,15 +902,16 @@ void ASTNode::renderSectionLambdas(const json::Value &Contexts, LambdaNode->render(Contexts, OS); } -void Template::render(const json::Value &Data, llvm::raw_ostream &OS) { +void Template::render(const llvm::json::Value &Data, llvm::raw_ostream &OS) { RawMustacheOutputStream MOS(OS); Tree->render(Data, MOS); } void Template::registerPartial(std::string Name, std::string Partial) { - Parser P(Partial, Ctx); + StringRef SavedPartial = Ctx.Saver.save(Partial); + Parser P(SavedPartial, Ctx); AstPtr PartialTree = P.parse(); - Ctx.Partials.insert(std::make_pair(Name, std::move(PartialTree))); + Ctx.Partials.insert(std::make_pair(Name, PartialTree)); } void Template::registerLambda(std::string Name, Lambda L) { @@ -922,7 +926,7 @@ void Template::overrideEscapeCharacters(EscapeMap E) { Ctx.Escapes = std::move(E); } -Template::Template(StringRef TemplateStr) { +Template::Template(StringRef TemplateStr, MustacheContext &Ctx) : Ctx(Ctx) { Parser P(TemplateStr, Ctx); Tree = P.parse(); // The default behavior is to escape html entities. @@ -935,18 +939,12 @@ Template::Template(StringRef TemplateStr) { } Template::Template(Template &&Other) noexcept - : Ctx(std::move(Other.Ctx)), Tree(std::move(Other.Tree)) {} + : Ctx(Other.Ctx), Tree(Other.Tree) { + Other.Tree = nullptr; +} Template::~Template() = default; -Template &Template::operator=(Template &&Other) noexcept { - if (this != &Other) { - Ctx = std::move(Other.Ctx); - Tree = std::move(Other.Tree); - Other.Tree = nullptr; - } - return *this; -} } // namespace llvm::mustache #undef DEBUG_TYPE diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dc8e7c8..31b3d18 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1458,6 +1458,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Legal); setPartialReduceMLAAction(MLAOps, MVT::v2i32, MVT::v8i8, Legal); + setPartialReduceMLAAction(MLAOps, MVT::v2i32, MVT::v16i8, Custom); setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom); if (Subtarget->hasMatMulInt8()) { @@ -30769,6 +30770,17 @@ AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(SDValue Op, ResultVT.isFixedLengthVector() && useSVEForFixedLengthVectorVT(ResultVT, /*OverrideNEON=*/true); + // We can handle this case natively by accumulating into a wider + // zero-padded vector. + if (!ConvertToScalable && ResultVT == MVT::v2i32 && OpVT == MVT::v16i8) { + SDValue ZeroVec = DAG.getConstant(0, DL, MVT::v4i32); + SDValue WideAcc = DAG.getInsertSubvector(DL, ZeroVec, Acc, 0); + SDValue Wide = + DAG.getNode(Op.getOpcode(), DL, MVT::v4i32, WideAcc, LHS, RHS); + SDValue Reduced = DAG.getNode(AArch64ISD::ADDP, DL, MVT::v4i32, Wide, Wide); + return DAG.getExtractSubvector(DL, MVT::v2i32, Reduced, 0); + } + if (ConvertToScalable) { ResultVT = getContainerForFixedLengthVector(DAG, ResultVT); OpVT = getContainerForFixedLengthVector(DAG, LHS.getValueType()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 557d87f..56807a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5053,16 +5053,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // // vdst, srcA, srcB, srcC const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + + bool UseAGPRForm = !Subtarget.hasGFX90AInsts() || + Info->selectAGPRFormMFMA(MinNumRegsRequired); + OpdsMapping[0] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: @@ -5115,11 +5117,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8: case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: { + Register DstReg = MI.getOperand(0).getReg(); + unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + unsigned MinNumRegsRequired = DstSize / 32; + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired); + // vdst, srcA, srcB, srcC, idx - OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[0] = UseAGPRForm ? getAGPROpMapping(DstReg, MRI, *TRI) + : getVGPROpMapping(DstReg, MRI, *TRI); + OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); - OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[4] = + UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); break; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index b7dbb59..2c1a13c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1202,6 +1202,12 @@ public: unsigned getMinNumAGPRs() const { return MinNumAGPRs; } + /// Return true if an MFMA that requires at least \p NumRegs should select to + /// the AGPR form, instead of the VGPR form. + bool selectAGPRFormMFMA(unsigned NumRegs) const { + return !MFMAVGPRForm && getMinNumAGPRs() >= NumRegs; + } + // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. bool mayUseAGPRs(const Function &F) const; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 7cfd059..6500fce 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -964,14 +964,12 @@ class MAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false> : Pa class CanUseAGPR_MAI<ValueType vt> { code PredicateCode = [{ return !Subtarget->hasGFX90AInsts() || - (!SIMachineFunctionInfo::MFMAVGPRForm && - MF->getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >= - }] # !srl(vt.Size, 5) # ");"; + MF->getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA( + }] # !srl(vt.Size, 5) # ");"; code GISelPredicateCode = [{ return !Subtarget->hasGFX90AInsts() || - (!SIMachineFunctionInfo::MFMAVGPRForm && - MF.getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >= + MF.getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA( }] # !srl(vt.Size, 5) # ");"; } diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index a0acfcf..85ce944 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -699,35 +699,20 @@ def: OpR_RR_pat<C2_cmpgtp, setgt, i1, I64>; def: OpR_RR_pat<C2_cmpgtup, setugt, i1, I64>; def: OpR_RR_pat<C2_cmpgtp, RevCmp<setlt>, i1, I64>; def: OpR_RR_pat<C2_cmpgtup, RevCmp<setult>, i1, I64>; -def: OpR_RR_pat<A2_vcmpbeq, seteq, i1, V8I8>; def: OpR_RR_pat<A2_vcmpbeq, seteq, v8i1, V8I8>; -def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, i1, V8I8>; def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, v8i1, V8I8>; -def: OpR_RR_pat<A4_vcmpbgt, setgt, i1, V8I8>; def: OpR_RR_pat<A4_vcmpbgt, setgt, v8i1, V8I8>; -def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, i1, V8I8>; def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, v8i1, V8I8>; -def: OpR_RR_pat<A2_vcmpbgtu, setugt, i1, V8I8>; def: OpR_RR_pat<A2_vcmpbgtu, setugt, v8i1, V8I8>; -def: OpR_RR_pat<A2_vcmpheq, seteq, i1, V4I16>; def: OpR_RR_pat<A2_vcmpheq, seteq, v4i1, V4I16>; -def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, i1, V4I16>; def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, v4i1, V4I16>; -def: OpR_RR_pat<A2_vcmphgt, setgt, i1, V4I16>; def: OpR_RR_pat<A2_vcmphgt, setgt, v4i1, V4I16>; -def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, i1, V4I16>; def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, v4i1, V4I16>; -def: OpR_RR_pat<A2_vcmphgtu, setugt, i1, V4I16>; def: OpR_RR_pat<A2_vcmphgtu, setugt, v4i1, V4I16>; -def: OpR_RR_pat<A2_vcmpweq, seteq, i1, V2I32>; def: OpR_RR_pat<A2_vcmpweq, seteq, v2i1, V2I32>; -def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, v2i1, V2I32>; -def: OpR_RR_pat<A2_vcmpwgt, setgt, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgt, setgt, v2i1, V2I32>; -def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; -def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; @@ -1213,12 +1198,6 @@ def: OpR_RI_pat<S2_asl_i_r, Shl, i32, I32, u5_0ImmPred>; def: OpR_RI_pat<S2_asr_i_p, Sra, i64, I64, u6_0ImmPred>; def: OpR_RI_pat<S2_lsr_i_p, Srl, i64, I64, u6_0ImmPred>; def: OpR_RI_pat<S2_asl_i_p, Shl, i64, I64, u6_0ImmPred>; -def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>; -def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>; -def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>; -def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>; -def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>; -def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>; def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>; def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index bc047a4a..a1fb665 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -651,7 +651,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Custom conversions to/from v2i8. setOperationAction(ISD::BITCAST, MVT::v2i8, Custom); - // Only logical ops can be done on v4i8 directly, others must be done + // Only logical ops can be done on v4i8/v2i32 directly, others must be done // elementwise. setOperationAction( {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE, @@ -669,7 +669,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM, ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT, ISD::USUBSAT}, - MVT::v4i8, Expand); + {MVT::v4i8, MVT::v2i32}, Expand); // Operations not directly supported by NVPTX. for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, @@ -689,7 +689,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v2i32}, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 40c05e8..333b693 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1823,6 +1823,11 @@ def TuneConditionalCompressedMoveFusion def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">; def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">; +def TuneHasSingleElementVecFP64 + : SubtargetFeature<"single-element-vec-fp64", "HasSingleElementVectorFP64", "true", + "Certain vector FP64 operations produce a single result " + "element per cycle">; + def TuneMIPSP8700 : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700", "MIPS p8700 processor">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 447f05c..f2724c41 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1636,7 +1636,7 @@ def : QCISELECTCCIPat<SETNE, QC_SELECTNEI>; } let Predicates = [HasVendorXqcilsm, IsRV32] in { -def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), +def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; } // Predicates = [HasVendorXqcilsm, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index e519b72..57fbaa0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -634,56 +634,56 @@ def : PatGpr<bswap, REV8_RV64, i64>; let Predicates = [HasStdExtZbkb] in { def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), + (PACKH zexti8:$rs1, GPR:$rs2)>; +def : Pat<(or (shl zexti8:$rs2, (XLenVT 8)), + zexti8:$rs1), + (PACKH zexti8:$rs1, zexti8:$rs2)>; def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), 0xFFFF), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), 0xFFFF), + (PACKH zexti8:$rs1, GPR:$rs2)>; def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), + (PACKH zexti8:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbkb] let Predicates = [HasStdExtZbkb, IsRV32] in { -def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), - (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or zexti16:$rs1, (shl GPR:$rs2, (i32 16)))), + (PACK zexti16:$rs1, GPR:$rs2)>; -def : Pat<(or (shl GPR:$rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; +def : Pat<(i32 (or (shl GPR:$rs2, (XLenVT 24)), + (shl zexti8:$rs1, (XLenVT 16)))), + (SLLI (XLenVT (PACKH zexti8:$rs1, GPR:$rs2)), (XLenVT 16))>; // Match a pattern of 2 bytes being inserted into bits [31:16], with bits // bits [15:0] coming from a zero extended value. We can use pack with packh for // bits [31:16]. If bits [15:0] can also be a packh, it can be matched // separately. -def : Pat<(or (or (shl GPR:$op1rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (zexti16 (XLenVT GPR:$rs1))), - (PACK (XLenVT GPR:$rs1), - (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +def : Pat<(i32 (or (or (shl GPR:$op1rs2, (XLenVT 24)), + (shl zexti8:$op1rs1, (XLenVT 16))), + zexti16:$rs1)), + (PACK zexti16:$rs1, + (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; } let Predicates = [HasStdExtZbkb, IsRV64] in { -def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))), - (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (or zexti32:$rs1, (shl GPR:$rs2, (i64 32)))), + (PACK zexti32:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; +def : Pat<(i64 (or (shl zexti8:$rs2, (XLenVT 24)), + (shl zexti8:$rs1, (XLenVT 16)))), + (SLLI (XLenVT (PACKH zexti8:$rs1, zexti8:$rs2)), (XLenVT 16))>; def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; + (shl zexti8:$rs1, (XLenVT 16))), + (SLLI (XLenVT (PACKH zexti8:$rs1, GPR:$rs2)), (XLenVT 16))>; def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)), - (zexti16 (i64 GPR:$rs1))), - (PACKW GPR:$rs1, GPR:$rs2)>; + zexti16:$rs1), + (PACKW zexti16:$rs1, GPR:$rs2)>; def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), - (zexti16 (i64 GPR:$rs1)))), - (PACKW GPR:$rs1, GPR:$rs2)>; + zexti16:$rs1)), + (PACKW zexti16:$rs1, GPR:$rs2)>; // Match a pattern of 2 bytes being inserted into bits [31:16], with bits // bits [15:0] coming from a zero extended value, and bits [63:32] being @@ -691,35 +691,35 @@ def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), // also be a packh, it can be matched separately. def : Pat<(binop_allwusers<or> (or (shl GPR:$op1rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (zexti16 (XLenVT GPR:$rs1))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti8:$op1rs1, (XLenVT 16))), + zexti16:$rs1), + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; // We need to manually reassociate the patterns because of the binop_allwusers. def : Pat<(binop_allwusers<or> - (or (zexti16 (XLenVT GPR:$rs1)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (or zexti16:$rs1, + (shl zexti8:$op1rs1, (XLenVT 16))), (shl GPR:$op1rs2, (XLenVT 24))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; def : Pat<(binop_allwusers<or> - (or (zexti16 (XLenVT GPR:$rs1)), + (or zexti16:$rs1, (shl GPR:$op1rs2, (XLenVT 24))), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti8:$op1rs1, (XLenVT 16))), + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (shl zexti8:$op1rs1, (XLenVT 16))), (sext_inreg (shl GPR:$op1rs2, (XLenVT 24)), i32))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (PACKW GPR:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; // Match a pattern of 2 halfwords being inserted into bits [63:32], with bits // bits [31:0] coming from a zero extended value. We can use pack with packw for // bits [63:32]. If bits [63:31] can also be a packw, it can be matched // separately. def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)), - (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))), - (zexti32 (i64 GPR:$rs1))), - (PACK (XLenVT GPR:$rs1), - (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti16:$op1rs1, (i64 32))), + zexti32:$rs1), + (PACK zexti32:$rs1, + (XLenVT (PACKW zexti16:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 6d86aff..3658817 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -14,6 +14,10 @@ // otherwise. def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; +// This scheduling predicate is true when subtarget feature TuneHasSingleElementVecFP64 +// is enabled. +def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>; + // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. def isSEXT_W : TIIPredicate<"isSEXT_W", diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 17a7948..e86431f 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -338,7 +338,8 @@ def SIFIVE_X390 : RISCVProcessorModel<"sifive-x390", FeatureStdExtZvl1024b, FeatureVendorXSiFivecdiscarddlone, FeatureVendorXSiFivecflushdlone], - SiFiveIntelligenceTuneFeatures>; + !listconcat(SiFiveIntelligenceTuneFeatures, + [TuneHasSingleElementVecFP64])>; defvar SiFiveP400TuneFeatures = [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 3e07eff..f863392a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN, ProcResourceKind VL, ProcResourceKind VS, ProcResourceKind VCQ, SiFive7FPLatencies fpLatencies, - bit isFP64Throttled = false, bit hasFastGather = false> { // Branching @@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN, // 13. Vector Floating-Point Instructions foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, isF=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); - defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8); - defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; - let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; - } - defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4); - let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>; - // min max require merge - defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>; + if !eq(sew, 64) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF", + "WriteVFMulAddV", "WriteVFMulAddF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + // min max require merge + defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } } @@ -892,19 +918,28 @@ multiclass SiFive7WriteResBase<int VLEN, // Widening foreach mx = SchedMxListW in { foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in - defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in + defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { - defvar Cycles = SiFive7GetCyclesDefault<mx>.c; + defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in { defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; @@ -912,11 +947,19 @@ multiclass SiFive7WriteResBase<int VLEN, defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; } - defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in + defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; @@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN, } foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesNarrowing<mx>.c); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } } @@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance { /// eventually be supplied by different SchedMachineModels. multiclass SiFive7SchedResources<int vlen, bit extraVALU, SiFive7FPLatencies fpLatencies, - bit isFP64Throttled, bit hasFastGather> { defm SiFive7 : SiFive7ProcResources<extraVALU>; @@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU, : SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB, SiFive7IDiv, SiFive7FDiv, SiFive7VA1, SiFive7VA1OrVA2, SiFive7VL, SiFive7VS, - SiFive7VCQ, fpLatencies, isFP64Throttled, - hasFastGather>; + SiFive7VCQ, fpLatencies, hasFastGather>; //===----------------------------------------------------------------------===// // Bypass and advance @@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel { bit HasExtraVALU = false; SiFive7FPLatencies FPLatencies; - bit IsFP64Throttled = false; bit HasFastGather = false; string Name = !subst("Model", "", !subst("SiFive7", "", NAME)); @@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> { def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { let HasExtraVALU = true; let FPLatencies = SiFive7LowFPLatencies; - let IsFP64Throttled = true; let HasFastGather = true; } @@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in { let SchedModel = model in defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU, model.FPLatencies, - model.IsFP64Throttled, model.HasFastGather>; } diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 01a4308..d11b446 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -128,6 +128,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, IsWorstCase>; } +multiclass LMULSEWWriteResMXSEWVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, int sew, bit IsWorstCase> { + defm "" : LMULWriteResVariantImpl<name, name # "_" # mx # "_E" # sew, Pred, predResources, + predLat, predAcquireCycles, + predReleaseCycles, noPredResources, + noPredLat, noPredAcquireCycles, + noPredReleaseCycles, + IsWorstCase>; +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 764ff998..4b3ddbd 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -592,10 +592,10 @@ def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>; // Patterns VCVTTPD2UDQSZ128 -def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), - (VCVTTPD2UDQSZ128rmb addr:$src)>; def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))), (VCVTTPD2UDQSZ128rr VR128X:$src)>; +def : Pat<(v4i32 (X86cvttp2uis (loadv2f64 addr:$src))), + (VCVTTPD2UDQSZ128rm addr:$src)>; def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQSZ128rmb addr:$src)>; def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 9b9e2ba..9150b58 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -459,7 +459,7 @@ void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) { Value *Op0 = I->getOperand(0); Value *LHS = getReducedOperand(I->getOperand(1), SclTy); Value *RHS = getReducedOperand(I->getOperand(2), SclTy); - Res = Builder.CreateSelect(Op0, LHS, RHS); + Res = Builder.CreateSelect(Op0, LHS, RHS, "", I); break; } case Instruction::PHI: { diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 9115946..f166fef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -24,6 +24,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -33,6 +36,11 @@ using namespace llvm; #define DEBUG_TYPE "coro-annotation-elide" +static cl::opt<float> CoroElideBranchRatio( + "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden, + cl::desc("Minimum BranchProbability to consider a elide a coroutine.")); +extern cl::opt<unsigned> MinBlockCounterExecution; + static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { for (Instruction &I : F->getEntryBlock()) if (!isa<AllocaInst>(&I)) @@ -145,6 +153,30 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { + BranchProbability MinBranchProbability( + static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution), + MinBlockCounterExecution); + + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); + + auto Prob = BranchProbability::getBranchProbability( + BFI.getBlockFreq(CB->getParent()).getFrequency(), + BFI.getEntryFreq().getFrequency()); + + if (Prob < MinBranchProbability) { + ORE.emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller) + << "'" << ore::NV("callee", Callee->getName()) + << "' not elided in '" + << ore::NV("caller", Caller->getName()) + << "' because of low probability: " + << ore::NV("probability", Prob) << " (threshold: " + << ore::NV("threshold", MinBranchProbability) << ")"; + }); + continue; + } + auto *CallerN = CG.lookup(*Caller); auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr; // If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller @@ -156,7 +188,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' elided in '" << ore::NV("caller", Caller->getName()) - << "'"; + << "' (probability: " << ore::NV("probability", Prob) << ")"; }); FAM.invalidate(*Caller, PreservedAnalyses::none()); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2583249..1a00d17 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio( "outline candidate and original function")); // Used to tune the minimum number of execution counts needed in the predecessor // block to the cold edge. ie. confidence interval. -static cl::opt<unsigned> +cl::opt<unsigned> MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid")); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9b272c4..3ddf182 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -28,6 +28,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + /// This is the complement of getICmpCode, which turns an opcode and two /// operands into either a constant true or false, or a brand new ICmp /// instruction. The sign is passed in to determine which kind of predicate to @@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, bool IsLogical, InstCombiner::BuilderTy &Builder, - const SimplifyQuery &Q) { + const SimplifyQuery &Q, + Instruction &I) { // Match an equality compare with a non-poison constant as Cmp0. // Also, give up if the compare can be constant-folded to avoid looping. CmpPredicate Pred0; @@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); } - if (IsLogical) - return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp) - : Builder.CreateLogicalOr(Cmp0, SubstituteCmp); + if (IsLogical) { + Instruction *MDFrom = + ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I; + return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom) + : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom); + } return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, SubstituteCmp); } @@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /*IsLogical*/ false, Builder)) return V; - if (Value *V = - foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q)) + if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, + Builder, Q, I)) return V; // We can convert this case to bitwise and, because both operands are used // on the LHS, and as such poison from both will propagate. - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, - /*IsLogical=*/false, Builder, Q)) { + if (Value *V = foldAndOrOfICmpsWithConstEq( + RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) { // If RHS is still used, we should drop samesign flag. if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) { RHS->setSameSign(false); diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 3f7003d..f4e05a2 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -389,6 +389,22 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } +/// Helper to get the successor corresponding to a particular case value for +/// a switch statement. +static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; +} + namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -401,6 +417,10 @@ struct ThreadingPath { ExitVal = V->getValue(); IsExitValSet = true; } + void setExitValue(const APInt &V) { + ExitVal = V; + IsExitValSet = true; + } bool isExitValueSet() const { return IsExitValSet; } /// Determinator is the basic block that determines the next state of the DFA. @@ -583,44 +603,8 @@ struct AllSwitchPaths { BasicBlock *getSwitchBlock() { return SwitchBlock; } void run() { - StateDefMap StateDef = getStateDefMap(); - if (StateDef.empty()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", - Switch) - << "Switch instruction is not predictable."; - }); - return; - } - - auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); - auto *SwitchPhiDefBB = SwitchPhi->getParent(); - VisitedBlocks VB; - // Get paths from the determinator BBs to SwitchPhiDefBB - std::vector<ThreadingPath> PathsToPhiDef = - getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); - if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { - TPaths = std::move(PathsToPhiDef); - return; - } - - assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); - auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); - // Find and append paths from SwitchPhiDefBB to SwitchBlock. - PathsType PathsToSwitchBB = - paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); - if (PathsToSwitchBB.empty()) - return; - - std::vector<ThreadingPath> TempList; - for (const ThreadingPath &Path : PathsToPhiDef) { - for (const PathType &PathToSw : PathsToSwitchBB) { - ThreadingPath PathCopy(Path); - PathCopy.appendExcludingFirst(PathToSw); - TempList.push_back(PathCopy); - } - } - TPaths = std::move(TempList); + findTPaths(); + unifyTPaths(); } private: @@ -812,6 +796,69 @@ private: return Res; } + // Find all threadable paths. + void findTPaths() { + StateDefMap StateDef = getStateDefMap(); + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } + + auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); + auto *SwitchPhiDefBB = SwitchPhi->getParent(); + VisitedBlocks VB; + // Get paths from the determinator BBs to SwitchPhiDefBB + std::vector<ThreadingPath> PathsToPhiDef = + getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); + if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { + TPaths = std::move(PathsToPhiDef); + return; + } + + assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); + auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); + // Find and append paths from SwitchPhiDefBB to SwitchBlock. + PathsType PathsToSwitchBB = + paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); + if (PathsToSwitchBB.empty()) + return; + + std::vector<ThreadingPath> TempList; + for (const ThreadingPath &Path : PathsToPhiDef) { + for (const PathType &PathToSw : PathsToSwitchBB) { + ThreadingPath PathCopy(Path); + PathCopy.appendExcludingFirst(PathToSw); + TempList.push_back(PathCopy); + } + } + TPaths = std::move(TempList); + } + + // Two states are equivalent if they have the same switch destination. + // Unify the states in different threading path if the states are equivalent. + void unifyTPaths() { + llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + for (ThreadingPath &Path : TPaths) { + APInt NextState = Path.getExitValue(); + BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); + auto StateIt = DestToState.find(Dest); + if (StateIt == DestToState.end()) { + DestToState.insert({Dest, NextState}); + continue; + } + + if (NextState != StateIt->second) { + LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " + << StateIt->second << "\n"); + Path.setExitValue(StateIt->second); + } + } + } + unsigned NumVisited = 0; SwitchInst *Switch; BasicBlock *SwitchBlock; @@ -1335,21 +1382,6 @@ private: return It != ClonedBBs.end() ? (*It).BB : nullptr; } - /// Helper to get the successor corresponding to a particular case value for - /// a switch statement. - BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; - } - /// Returns true if IncomingBB is a predecessor of BB. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { return llvm::is_contained(predecessors(BB), IncomingBB); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 155fcc5..9ac3be1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, unsigned PreviousEdges = OtherCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; - for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + unsigned E = PreviousEdges - 1; + // Remove all incoming values from OtherDest if OtherDest is unreachable. + if (NewBI->isUnconditional()) + ++E; + for (unsigned I = 0; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3f16b03..e62d57e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5696,7 +5696,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Instruction *I = Worklist.pop_back_val(); for (auto &Op : I->operands()) if (auto *InstOp = dyn_cast<Instruction>(Op)) - if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) && + if (TheLoop->contains(InstOp) && !isa<PHINode>(InstOp) && AddrDefs.insert(InstOp).second) Worklist.push_back(InstOp); } diff --git a/llvm/test/Analysis/IR2Vec/unreachable.ll b/llvm/test/Analysis/IR2Vec/unreachable.ll index 9be0ee1..627e2c9 100644 --- a/llvm/test/Analysis/IR2Vec/unreachable.ll +++ b/llvm/test/Analysis/IR2Vec/unreachable.ll @@ -30,13 +30,17 @@ return: ; preds = %if.else, %if.then %4 = load i32, ptr %retval, align 4 ret i32 %4 } - -; CHECK: Basic block vectors: +; We'll get individual basic block embeddings for all blocks in the function. +; But unreachable blocks are not counted for computing the function embedding. +; CHECK: Function vector: [ 1301.20 1318.20 1335.20 ] +; CHECK-NEXT: Basic block vectors: ; CHECK-NEXT: Basic block: entry: ; CHECK-NEXT: [ 816.20 825.20 834.20 ] ; CHECK-NEXT: Basic block: if.then: ; CHECK-NEXT: [ 195.00 198.00 201.00 ] ; CHECK-NEXT: Basic block: if.else: ; CHECK-NEXT: [ 195.00 198.00 201.00 ] +; CHECK-NEXT: Basic block: unreachable: +; CHECK-NEXT: [ 101.00 103.00 105.00 ] ; CHECK-NEXT: Basic block: return: ; CHECK-NEXT: [ 95.00 97.00 99.00 ] diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll index 4287507..dfff35d 100644 --- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll @@ -1451,3 +1451,52 @@ define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32> %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift) ret <4 x i32> %red } + +define <2 x i32> @udot_v16i8tov2i32(<2 x i32> %acc, <16 x i8> %input) { +; CHECK-NODOT-LABEL: udot_v16i8tov2i32: +; CHECK-NODOT: // %bb.0: // %entry +; CHECK-NODOT-NEXT: ushll v2.8h, v1.8b, #0 +; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0 +; CHECK-NODOT-NEXT: ushll v3.4s, v2.4h, #0 +; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NODOT-NEXT: ushll2 v4.4s, v2.8h, #0 +; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-NODOT-NEXT: ext v3.16b, v4.16b, v4.16b, #8 +; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NODOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s +; CHECK-NODOT-NEXT: ext v2.16b, v3.16b, v3.16b, #8 +; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s +; CHECK-NODOT-NEXT: ret +; +; CHECK-DOT-LABEL: udot_v16i8tov2i32: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v2.16b, #1 +; CHECK-DOT-NEXT: fmov d0, d0 +; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b +; CHECK-DOT-NEXT: addp v0.4s, v0.4s, v0.4s +; CHECK-DOT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-DOT-NEXT: ret +; +; CHECK-DOT-I8MM-LABEL: udot_v16i8tov2i32: +; CHECK-DOT-I8MM: // %bb.0: // %entry +; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1 +; CHECK-DOT-I8MM-NEXT: fmov d0, d0 +; CHECK-DOT-I8MM-NEXT: udot v0.4s, v1.16b, v2.16b +; CHECK-DOT-I8MM-NEXT: addp v0.4s, v0.4s, v0.4s +; CHECK-DOT-I8MM-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-DOT-I8MM-NEXT: ret +entry: + %input.wide = zext <16 x i8> %input to <16 x i32> + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add(<2 x i32> %acc, <16 x i32> %input.wide) + ret <2 x i32> %partial.reduce +} diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll new file mode 100644 index 0000000..f466513 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; Make sure the coalescer doesn't introduce any uses of +; vreg_1024. None are available to allocate with the register budget +; of this function. + +define void @no_introduce_vreg_1024() #0 { +; CHECK-LABEL: no_introduce_vreg_1024: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v9, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %tuple = call <8 x i32> asm sideeffect "; def $0","=v"() + %sub0 = extractelement <8 x i32> %tuple, i32 0 + %insert = insertelement <16 x i32> poison, i32 %sub0, i32 9 + call void asm sideeffect "; use $0","v"(<16 x i32> %insert) + ret void +} + +attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" } diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir new file mode 100644 index 0000000..1f414eb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s + +# The register budget for this function does not permit using 1024-bit +# registers. The coalescer should not introduce a 1024-bit virtual +# register which will fail to allocate. + +--- | + define void @no_introduce_vreg_1024() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="10,10" } +... +--- +name: no_introduce_vreg_1024 +tracksRegLiveness: true +machineFunctionInfo: + occupancy: 10 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: no_introduce_vreg_1024 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub9:vreg_512 = COPY [[COPY]].sub0 + ; CHECK-NEXT: SI_RETURN implicit [[COPY1]] + %0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + undef %1.sub9:vreg_512 = COPY %0.sub0 + SI_RETURN implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll index ee11b92..0c1448a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll @@ -44,23 +44,23 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) % ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; GISEL-NEXT: s_load_dword s16, s[4:5], 0x64 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[2:3] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GISEL-NEXT: v_mov_b32_e32 v12, s16 +; GISEL-NEXT: v_mov_b32_e32 v16, s16 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[6:7] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[6:7] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -834,24 +834,24 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) % ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] -; GISEL-NEXT: v_mov_b32_e32 v12, s2 +; GISEL-NEXT: v_mov_b32_e32 v16, s2 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -1349,24 +1349,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] -; GISEL-NEXT: v_mov_b32_e32 v12, s2 +; GISEL-NEXT: v_mov_b32_e32 v16, s2 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -1513,24 +1513,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] -; GISEL-NEXT: v_mov_b32_e32 v12, s2 +; GISEL-NEXT: v_mov_b32_e32 v16, s2 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -1677,24 +1677,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] -; GISEL-NEXT: v_mov_b32_e32 v12, s2 +; GISEL-NEXT: v_mov_b32_e32 v16, s2 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -1841,24 +1841,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: global_load_dwordx4 v[14:17], v0, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1] ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] -; GISEL-NEXT: v_mov_b32_e32 v12, s2 +; GISEL-NEXT: v_mov_b32_e32 v16, s2 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2 +; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v0, v[14:17], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/NVPTX/i32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i32x2-instructions.ll new file mode 100644 index 0000000..153ca10 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i32x2-instructions.ll @@ -0,0 +1,1625 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-NOI32X2 %s +; RUN: %if ptxas-sm_80 %{ \ +; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_80 \ +; RUN: %} +; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-I32X2 %s +; RUN: %if ptxas-sm_100 %{ \ +; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_100 \ +; RUN: %} + +target triple = "nvptx64-nvidia-cuda" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define <2 x i32> @test_ret_const() #0 { +; CHECK-LABEL: test_ret_const( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {-1, 2}; +; CHECK-NEXT: ret; + ret <2 x i32> <i32 -1, i32 2> +} + +define i32 @test_extract_0(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_extract_0( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_0_param_0]; +; CHECK-NOI32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_extract_0( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<2>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, _}, %rd1; +; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-I32X2-NEXT: ret; + %e = extractelement <2 x i32> %a, i32 0 + ret i32 %e +} + +define i32 @test_extract_1(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_extract_1( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_1_param_0]; +; CHECK-NOI32X2-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_extract_1( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<2>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {_, %r1}, %rd1; +; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-I32X2-NEXT: ret; + %e = extractelement <2 x i32> %a, i32 1 + ret i32 %e +} + +define i32 @test_extract_i(<2 x i32> %a, i64 %idx) #0 { +; CHECK-NOI32X2-LABEL: test_extract_i( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .local .align 8 .b8 __local_depot3[8]; +; CHECK-NOI32X2-NEXT: .reg .b64 %SP; +; CHECK-NOI32X2-NEXT: .reg .b64 %SPL; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<4>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<6>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: mov.b64 %SPL, __local_depot3; +; CHECK-NOI32X2-NEXT: cvta.local.u64 %SP, %SPL; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_i_param_0]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; +; CHECK-NOI32X2-NEXT: st.v2.b32 [%SP], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: and.b64 %rd2, %rd1, 1; +; CHECK-NOI32X2-NEXT: shl.b64 %rd3, %rd2, 2; +; CHECK-NOI32X2-NEXT: add.u64 %rd4, %SP, 0; +; CHECK-NOI32X2-NEXT: or.b64 %rd5, %rd4, %rd3; +; CHECK-NOI32X2-NEXT: ld.b32 %r3, [%rd5]; +; CHECK-NOI32X2-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_extract_i( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<2>; +; CHECK-I32X2-NEXT: .reg .b32 %r<4>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_extract_i_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_0]; +; CHECK-I32X2-NEXT: setp.eq.b64 %p1, %rd2, 0; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: selp.b32 %r3, %r1, %r2, %p1; +; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-I32X2-NEXT: ret; + %e = extractelement <2 x i32> %a, i64 %idx + ret i32 %e +} + +define <2 x i32> @test_add(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_add( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_add_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_add_param_0]; +; CHECK-NOI32X2-NEXT: add.s32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: add.s32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_add( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_add_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_add_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: add.s32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: add.s32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = add <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_add_imm_0(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_add_imm_0( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_add_imm_0_param_0]; +; CHECK-NOI32X2-NEXT: add.s32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: add.s32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_add_imm_0( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_add_imm_0_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: add.s32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: add.s32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = add <2 x i32> <i32 1, i32 2>, %a + ret <2 x i32> %r +} + +define <2 x i32> @test_add_imm_1(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_add_imm_1( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_add_imm_1_param_0]; +; CHECK-NOI32X2-NEXT: add.s32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: add.s32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_add_imm_1( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_add_imm_1_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: add.s32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: add.s32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = add <2 x i32> %a, <i32 1, i32 2> + ret <2 x i32> %r +} + +define <2 x i32> @test_sub(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_sub( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_sub_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sub_param_0]; +; CHECK-NOI32X2-NEXT: sub.s32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: sub.s32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_sub( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_sub_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_sub_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: sub.s32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: sub.s32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = sub <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_smax(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_smax( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_smax_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_smax_param_0]; +; CHECK-NOI32X2-NEXT: max.s32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: max.s32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_smax( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_smax_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_smax_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: max.s32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: max.s32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp sgt <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i32> @test_umax(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_umax( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_umax_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_umax_param_0]; +; CHECK-NOI32X2-NEXT: max.u32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: max.u32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_umax( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_umax_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_umax_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: max.u32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: max.u32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp ugt <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i32> @test_smin(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_smin( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_smin_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_smin_param_0]; +; CHECK-NOI32X2-NEXT: min.s32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: min.s32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_smin( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_smin_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_smin_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: min.s32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: min.s32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp sle <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i32> @test_umin(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_umin( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_umin_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_umin_param_0]; +; CHECK-NOI32X2-NEXT: min.u32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: min.u32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_umin( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_umin_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_umin_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: min.u32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: min.u32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp ule <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i32> @test_eq(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-NOI32X2-LABEL: test_eq( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .pred %p<3>; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_eq_param_2]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_eq_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_eq_param_0]; +; CHECK-NOI32X2-NEXT: setp.eq.b32 %p1, %r1, %r3; +; CHECK-NOI32X2-NEXT: setp.eq.b32 %p2, %r2, %r4; +; CHECK-NOI32X2-NEXT: selp.b32 %r7, %r2, %r6, %p2; +; CHECK-NOI32X2-NEXT: selp.b32 %r8, %r1, %r5, %p1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_eq( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<3>; +; CHECK-I32X2-NEXT: .reg .b32 %r<9>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [test_eq_param_2]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_eq_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_eq_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: setp.eq.b32 %p1, %r3, %r1; +; CHECK-I32X2-NEXT: setp.eq.b32 %p2, %r4, %r2; +; CHECK-I32X2-NEXT: mov.b64 {%r5, %r6}, %rd3; +; CHECK-I32X2-NEXT: selp.b32 %r7, %r4, %r6, %p2; +; CHECK-I32X2-NEXT: selp.b32 %r8, %r3, %r5, %p1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp eq <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %c + ret <2 x i32> %r +} + +define <2 x i32> @test_ne(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-NOI32X2-LABEL: test_ne( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .pred %p<3>; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_ne_param_2]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_ne_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_ne_param_0]; +; CHECK-NOI32X2-NEXT: setp.ne.b32 %p1, %r1, %r3; +; CHECK-NOI32X2-NEXT: setp.ne.b32 %p2, %r2, %r4; +; CHECK-NOI32X2-NEXT: selp.b32 %r7, %r2, %r6, %p2; +; CHECK-NOI32X2-NEXT: selp.b32 %r8, %r1, %r5, %p1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_ne( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<3>; +; CHECK-I32X2-NEXT: .reg .b32 %r<9>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [test_ne_param_2]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_ne_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_ne_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: setp.ne.b32 %p1, %r3, %r1; +; CHECK-I32X2-NEXT: setp.ne.b32 %p2, %r4, %r2; +; CHECK-I32X2-NEXT: mov.b64 {%r5, %r6}, %rd3; +; CHECK-I32X2-NEXT: selp.b32 %r7, %r4, %r6, %p2; +; CHECK-I32X2-NEXT: selp.b32 %r8, %r3, %r5, %p1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-I32X2-NEXT: ret; + %cmp = icmp ne <2 x i32> %a, %b + %r = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %c + ret <2 x i32> %r +} + +define <2 x i32> @test_mul(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_mul( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_mul_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_mul_param_0]; +; CHECK-NOI32X2-NEXT: mul.lo.s32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: mul.lo.s32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_mul( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_mul_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_mul_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: mul.lo.s32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: mul.lo.s32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = mul <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_or(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_or( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_or_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_or_param_0]; +; CHECK-NOI32X2-NEXT: or.b32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: or.b32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_or( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_or_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_or_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: or.b32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: or.b32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = or <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_or_computed(i32 %a) { +; CHECK-LABEL: test_or_computed( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_or_computed_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, 5}; +; CHECK-NEXT: ret; + %ins.0 = insertelement <2 x i32> zeroinitializer, i32 %a, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 5, i32 1 + %r = or <2 x i32> %ins.1, %ins.0 + ret <2 x i32> %r +} + +define <2 x i32> @test_or_imm_0(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_or_imm_0( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_or_imm_0_param_0]; +; CHECK-NOI32X2-NEXT: or.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: or.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_or_imm_0( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_or_imm_0_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: or.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: or.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = or <2 x i32> <i32 1, i32 2>, %a + ret <2 x i32> %r +} + +define <2 x i32> @test_or_imm_1(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_or_imm_1( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_or_imm_1_param_0]; +; CHECK-NOI32X2-NEXT: or.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: or.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_or_imm_1( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_or_imm_1_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: or.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: or.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = or <2 x i32> %a, <i32 1, i32 2> + ret <2 x i32> %r +} + +define <2 x i32> @test_xor(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_xor( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_xor_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_xor_param_0]; +; CHECK-NOI32X2-NEXT: xor.b32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: xor.b32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_xor( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_xor_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_xor_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: xor.b32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: xor.b32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = xor <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_xor_computed(i32 %a) { +; CHECK-LABEL: test_xor_computed( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_xor_computed_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 5}; +; CHECK-NEXT: ret; + %ins.0 = insertelement <2 x i32> zeroinitializer, i32 %a, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 5, i32 1 + %r = xor <2 x i32> %ins.1, %ins.0 + ret <2 x i32> %r +} + +define <2 x i32> @test_xor_imm_0(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_xor_imm_0( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_xor_imm_0_param_0]; +; CHECK-NOI32X2-NEXT: xor.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: xor.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_xor_imm_0( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_xor_imm_0_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: xor.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: xor.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = xor <2 x i32> <i32 1, i32 2>, %a + ret <2 x i32> %r +} + +define <2 x i32> @test_xor_imm_1(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_xor_imm_1( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_xor_imm_1_param_0]; +; CHECK-NOI32X2-NEXT: xor.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: xor.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_xor_imm_1( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_xor_imm_1_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: xor.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: xor.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = xor <2 x i32> %a, <i32 1, i32 2> + ret <2 x i32> %r +} + +define <2 x i32> @test_and(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_and( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_and_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_and_param_0]; +; CHECK-NOI32X2-NEXT: and.b32 %r5, %r2, %r4; +; CHECK-NOI32X2-NEXT: and.b32 %r6, %r1, %r3; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_and( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_and_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_and_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-I32X2-NEXT: and.b32 %r5, %r4, %r2; +; CHECK-I32X2-NEXT: and.b32 %r6, %r3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-I32X2-NEXT: ret; + %r = and <2 x i32> %a, %b + ret <2 x i32> %r +} + +define <2 x i32> @test_and_computed(i32 %a) { +; CHECK-LABEL: test_and_computed( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_and_computed_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, 0}; +; CHECK-NEXT: ret; + %ins.0 = insertelement <2 x i32> zeroinitializer, i32 %a, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 5, i32 1 + %r = and <2 x i32> %ins.1, %ins.0 + ret <2 x i32> %r +} + +define <2 x i32> @test_and_imm_0(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_and_imm_0( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_and_imm_0_param_0]; +; CHECK-NOI32X2-NEXT: and.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: and.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_and_imm_0( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_and_imm_0_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: and.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: and.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = and <2 x i32> <i32 1, i32 2>, %a + ret <2 x i32> %r +} + +define <2 x i32> @test_and_imm_1(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_and_imm_1( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_and_imm_1_param_0]; +; CHECK-NOI32X2-NEXT: and.b32 %r3, %r2, 2; +; CHECK-NOI32X2-NEXT: and.b32 %r4, %r1, 1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_and_imm_1( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_and_imm_1_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: and.b32 %r3, %r2, 2; +; CHECK-I32X2-NEXT: and.b32 %r4, %r1, 1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = and <2 x i32> %a, <i32 1, i32 2> + ret <2 x i32> %r +} + +define void @test_ldst_v2i32(ptr %a, ptr %b) { +; CHECK-NOI32X2-LABEL: test_ldst_v2i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2i32_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2i32_param_0]; +; CHECK-NOI32X2-NEXT: ld.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-NOI32X2-NEXT: st.v2.b32 [%rd2], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_ldst_v2i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2i32_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2i32_param_0]; +; CHECK-I32X2-NEXT: ld.b64 %rd3, [%rd1]; +; CHECK-I32X2-NEXT: st.b64 [%rd2], %rd3; +; CHECK-I32X2-NEXT: ret; + %t1 = load <2 x i32>, ptr %a + store <2 x i32> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v3i32(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v3i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v3i32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v3i32_param_0]; +; CHECK-NEXT: ld.b64 %rd3, [%rd1]; +; CHECK-NEXT: ld.b32 %r1, [%rd1+8]; +; CHECK-NEXT: st.b32 [%rd2+8], %r1; +; CHECK-NEXT: st.b64 [%rd2], %rd3; +; CHECK-NEXT: ret; + %t1 = load <3 x i32>, ptr %a + store <3 x i32> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v4i32(ptr %a, ptr %b) { +; CHECK-NOI32X2-LABEL: test_ldst_v4i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v4i32_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v4i32_param_0]; +; CHECK-NOI32X2-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NOI32X2-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_ldst_v4i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v4i32_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v4i32_param_0]; +; CHECK-I32X2-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd1]; +; CHECK-I32X2-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4}; +; CHECK-I32X2-NEXT: ret; + %t1 = load <4 x i32>, ptr %a + store <4 x i32> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v2i32_unaligned(ptr %a, ptr %b) { +; CHECK-NOI32X2-LABEL: test_ldst_v2i32_unaligned( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<13>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2i32_unaligned_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2i32_unaligned_param_0]; +; CHECK-NOI32X2-NEXT: ld.b8 %r1, [%rd1+2]; +; CHECK-NOI32X2-NEXT: shl.b32 %r2, %r1, 16; +; CHECK-NOI32X2-NEXT: ld.b8 %r3, [%rd1+3]; +; CHECK-NOI32X2-NEXT: shl.b32 %r4, %r3, 24; +; CHECK-NOI32X2-NEXT: or.b32 %r5, %r4, %r2; +; CHECK-NOI32X2-NEXT: ld.b8 %r6, [%rd1]; +; CHECK-NOI32X2-NEXT: ld.b8 %r7, [%rd1+1]; +; CHECK-NOI32X2-NEXT: ld.b8 %r8, [%rd1+4]; +; CHECK-NOI32X2-NEXT: ld.b8 %r9, [%rd1+5]; +; CHECK-NOI32X2-NEXT: ld.b8 %r10, [%rd1+6]; +; CHECK-NOI32X2-NEXT: ld.b8 %r11, [%rd1+7]; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+7], %r11; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+6], %r10; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+5], %r9; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+4], %r8; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+1], %r7; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2], %r6; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+3], %r3; +; CHECK-NOI32X2-NEXT: shr.u32 %r12, %r5, 16; +; CHECK-NOI32X2-NEXT: st.b8 [%rd2+2], %r12; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_ldst_v2i32_unaligned( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<28>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2i32_unaligned_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2i32_unaligned_param_0]; +; CHECK-I32X2-NEXT: ld.b8 %rd3, [%rd1]; +; CHECK-I32X2-NEXT: ld.b8 %rd4, [%rd1+1]; +; CHECK-I32X2-NEXT: shl.b64 %rd5, %rd4, 8; +; CHECK-I32X2-NEXT: or.b64 %rd6, %rd5, %rd3; +; CHECK-I32X2-NEXT: ld.b8 %rd7, [%rd1+2]; +; CHECK-I32X2-NEXT: shl.b64 %rd8, %rd7, 16; +; CHECK-I32X2-NEXT: ld.b8 %rd9, [%rd1+3]; +; CHECK-I32X2-NEXT: shl.b64 %rd10, %rd9, 24; +; CHECK-I32X2-NEXT: or.b64 %rd11, %rd10, %rd8; +; CHECK-I32X2-NEXT: or.b64 %rd12, %rd11, %rd6; +; CHECK-I32X2-NEXT: ld.b8 %rd13, [%rd1+4]; +; CHECK-I32X2-NEXT: ld.b8 %rd14, [%rd1+5]; +; CHECK-I32X2-NEXT: shl.b64 %rd15, %rd14, 8; +; CHECK-I32X2-NEXT: or.b64 %rd16, %rd15, %rd13; +; CHECK-I32X2-NEXT: ld.b8 %rd17, [%rd1+6]; +; CHECK-I32X2-NEXT: shl.b64 %rd18, %rd17, 16; +; CHECK-I32X2-NEXT: ld.b8 %rd19, [%rd1+7]; +; CHECK-I32X2-NEXT: shl.b64 %rd20, %rd19, 24; +; CHECK-I32X2-NEXT: or.b64 %rd21, %rd20, %rd18; +; CHECK-I32X2-NEXT: or.b64 %rd22, %rd21, %rd16; +; CHECK-I32X2-NEXT: shl.b64 %rd23, %rd22, 32; +; CHECK-I32X2-NEXT: or.b64 %rd24, %rd23, %rd12; +; CHECK-I32X2-NEXT: st.b8 [%rd2+6], %rd17; +; CHECK-I32X2-NEXT: shr.u64 %rd25, %rd24, 56; +; CHECK-I32X2-NEXT: st.b8 [%rd2+7], %rd25; +; CHECK-I32X2-NEXT: st.b8 [%rd2+4], %rd13; +; CHECK-I32X2-NEXT: shr.u64 %rd26, %rd24, 40; +; CHECK-I32X2-NEXT: st.b8 [%rd2+5], %rd26; +; CHECK-I32X2-NEXT: st.b8 [%rd2+1], %rd4; +; CHECK-I32X2-NEXT: st.b8 [%rd2], %rd3; +; CHECK-I32X2-NEXT: st.b8 [%rd2+3], %rd9; +; CHECK-I32X2-NEXT: shr.u64 %rd27, %rd24, 16; +; CHECK-I32X2-NEXT: st.b8 [%rd2+2], %rd27; +; CHECK-I32X2-NEXT: ret; + %t1 = load <2 x i32>, ptr %a, align 1 + store <2 x i32> %t1, ptr %b, align 1 + ret void +} + +declare <2 x i32> @test_callee(<2 x i32> %a, <2 x i32> %b) #0 + +define <2 x i32> @test_call(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_call( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_call_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_call_param_0]; +; CHECK-NOI32X2-NEXT: { // callseq 0, 0 +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param1], {%r3, %r4}; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; +; CHECK-NOI32X2-NEXT: } // callseq 0 +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_call( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_call_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_call_param_0]; +; CHECK-I32X2-NEXT: { // callseq 0, 0 +; CHECK-I32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-I32X2-NEXT: st.param.b64 [param1], %rd2; +; CHECK-I32X2-NEXT: st.param.b64 [param0], %rd1; +; CHECK-I32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-I32X2-NEXT: } // callseq 0 +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-I32X2-NEXT: ret; + %r = call <2 x i32> @test_callee(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %r +} + +define <2 x i32> @test_call_flipped(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_call_flipped( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_call_flipped_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_call_flipped_param_0]; +; CHECK-NOI32X2-NEXT: { // callseq 1, 0 +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param1], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param0], {%r3, %r4}; +; CHECK-NOI32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; +; CHECK-NOI32X2-NEXT: } // callseq 1 +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_call_flipped( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_call_flipped_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0]; +; CHECK-I32X2-NEXT: { // callseq 1, 0 +; CHECK-I32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-I32X2-NEXT: st.param.b64 [param1], %rd1; +; CHECK-I32X2-NEXT: st.param.b64 [param0], %rd2; +; CHECK-I32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-I32X2-NEXT: } // callseq 1 +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-I32X2-NEXT: ret; + %r = call <2 x i32> @test_callee(<2 x i32> %b, <2 x i32> %a) + ret <2 x i32> %r +} + +define <2 x i32> @test_tailcall_flipped(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_tailcall_flipped( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_tailcall_flipped_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_tailcall_flipped_param_0]; +; CHECK-NOI32X2-NEXT: { // callseq 2, 0 +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NOI32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param1], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [param0], {%r3, %r4}; +; CHECK-NOI32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; +; CHECK-NOI32X2-NEXT: } // callseq 2 +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_tailcall_flipped( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_tailcall_flipped_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0]; +; CHECK-I32X2-NEXT: { // callseq 2, 0 +; CHECK-I32X2-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-I32X2-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-I32X2-NEXT: st.param.b64 [param1], %rd1; +; CHECK-I32X2-NEXT: st.param.b64 [param0], %rd2; +; CHECK-I32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-I32X2-NEXT: } // callseq 2 +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-I32X2-NEXT: ret; + %r = tail call <2 x i32> @test_callee(<2 x i32> %b, <2 x i32> %a) + ret <2 x i32> %r +} + +define <2 x i32> @test_select(<2 x i32> %a, <2 x i32> %b, i1 zeroext %c) #0 { +; CHECK-NOI32X2-LABEL: test_select( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .pred %p<2>; +; CHECK-NOI32X2-NEXT: .reg .b16 %rs<3>; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.b8 %rs1, [test_select_param_2]; +; CHECK-NOI32X2-NEXT: and.b16 %rs2, %rs1, 1; +; CHECK-NOI32X2-NEXT: setp.ne.b16 %p1, %rs2, 0; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_param_0]; +; CHECK-NOI32X2-NEXT: selp.b32 %r5, %r2, %r4, %p1; +; CHECK-NOI32X2-NEXT: selp.b32 %r6, %r1, %r3, %p1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_select( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<2>; +; CHECK-I32X2-NEXT: .reg .b16 %rs<3>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b8 %rs1, [test_select_param_2]; +; CHECK-I32X2-NEXT: and.b16 %rs2, %rs1, 1; +; CHECK-I32X2-NEXT: setp.ne.b16 %p1, %rs2, 0; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_select_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_select_param_0]; +; CHECK-I32X2-NEXT: selp.b64 %rd3, %rd1, %rd2, %p1; +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-I32X2-NEXT: ret; + %r = select i1 %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i32> @test_select_cc(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) #0 { +; CHECK-NOI32X2-LABEL: test_select_cc( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .pred %p<3>; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<11>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r7, %r8}, [test_select_cc_param_3]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_param_2]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_param_0]; +; CHECK-NOI32X2-NEXT: setp.ne.b32 %p1, %r5, %r7; +; CHECK-NOI32X2-NEXT: setp.ne.b32 %p2, %r6, %r8; +; CHECK-NOI32X2-NEXT: selp.b32 %r9, %r2, %r4, %p2; +; CHECK-NOI32X2-NEXT: selp.b32 %r10, %r1, %r3, %p1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_select_cc( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<3>; +; CHECK-I32X2-NEXT: .reg .b32 %r<11>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd4, [test_select_cc_param_3]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [test_select_cc_param_2]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_select_cc_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_select_cc_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd4; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd3; +; CHECK-I32X2-NEXT: setp.ne.b32 %p1, %r3, %r1; +; CHECK-I32X2-NEXT: setp.ne.b32 %p2, %r4, %r2; +; CHECK-I32X2-NEXT: mov.b64 {%r5, %r6}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {%r7, %r8}, %rd1; +; CHECK-I32X2-NEXT: selp.b32 %r9, %r8, %r6, %p2; +; CHECK-I32X2-NEXT: selp.b32 %r10, %r7, %r5, %p1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; +; CHECK-I32X2-NEXT: ret; + %cc = icmp ne <2 x i32> %c, %d + %r = select <2 x i1> %cc, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %r +} + +define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_trunc_2xi32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<4>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_2xi32_param_0]; +; CHECK-NOI32X2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U; +; CHECK-NOI32X2-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_trunc_2xi32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_trunc_2xi32_param_0]; +; CHECK-I32X2-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-I32X2-NEXT: ret; + %r = trunc <2 x i32> %a to <2 x i16> + ret <2 x i16> %r +} + +define <2 x i32> @test_trunc_2xi64(<2 x i64> %a) #0 { +; CHECK-LABEL: test_trunc_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_trunc_2xi64_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r1, %rd2; +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NEXT: ret; + %r = trunc <2 x i64> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 { +; CHECK-LABEL: test_zext_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: cvt.u32.u16 %r2, %rs2; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; +; CHECK-NEXT: ret; + %r = zext <2 x i16> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i64> @test_zext_2xi64(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_zext_2xi64( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_zext_2xi64_param_0]; +; CHECK-NOI32X2-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NOI32X2-NEXT: cvt.u64.u32 %rd2, %r1; +; CHECK-NOI32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_zext_2xi64( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<3>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_zext_2xi64_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: cvt.u64.u32 %rd2, %r2; +; CHECK-I32X2-NEXT: cvt.u64.u32 %rd3, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-I32X2-NEXT: ret; + %r = zext <2 x i32> %a to <2 x i64> + ret <2 x i64> %r +} + +define <2 x i32> @test_bitcast_i64_to_2xi32(i64 %a) #0 { +; CHECK-LABEL: test_bitcast_i64_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_i64_to_2xi32_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %r = bitcast i64 %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i32> @test_bitcast_double_to_2xi32(double %a) #0 { +; CHECK-LABEL: test_bitcast_double_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_double_to_2xi32_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %r = bitcast double %a to <2 x i32> + ret <2 x i32> %r +} + +define i64 @test_bitcast_2xi32_to_i64(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_bitcast_2xi32_to_i64( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xi32_to_i64_param_0]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_bitcast_2xi32_to_i64( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_bitcast_2xi32_to_i64_param_0]; +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-I32X2-NEXT: ret; + %r = bitcast <2 x i32> %a to i64 + ret i64 %r +} + +define double @test_bitcast_2xi32_to_double(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_bitcast_2xi32_to_double( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xi32_to_double_param_0]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_bitcast_2xi32_to_double( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_bitcast_2xi32_to_double_param_0]; +; CHECK-I32X2-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-I32X2-NEXT: ret; + %r = bitcast <2 x i32> %a to double + ret double %r +} + + +define <4 x half> @test_bitcast_2xi32_to_4xhalf(i32 %a) #0 { +; CHECK-LABEL: test_bitcast_2xi32_to_4xhalf( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_bitcast_2xi32_to_4xhalf_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, 5}; +; CHECK-NEXT: ret; + %ins.0 = insertelement <2 x i32> poison, i32 %a, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 5, i32 1 + %r = bitcast <2 x i32> %ins.1 to <4 x half> + ret <4 x half> %r +} + + +define <2 x i32> @test_shufflevector(<2 x i32> %a) #0 { +; CHECK-NOI32X2-LABEL: test_shufflevector( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_shufflevector_param_0]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_shufflevector( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<3>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_shufflevector_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-I32X2-NEXT: ret; + %s = shufflevector <2 x i32> %a, <2 x i32> poison, <2 x i32> <i32 1, i32 0> + ret <2 x i32> %s +} + +define <2 x i32> @test_shufflevector_2(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-NOI32X2-LABEL: test_shufflevector_2( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_shufflevector_2_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_shufflevector_2_param_0]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r4}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_shufflevector_2( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<3>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_shufflevector_2_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_shufflevector_2_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {_, %r1}, %rd2; +; CHECK-I32X2-NEXT: mov.b64 {_, %r2}, %rd1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-I32X2-NEXT: ret; + %s = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> + ret <2 x i32> %s +} + + +define <2 x i32> @test_insertelement(<2 x i32> %a, i32 %x) #0 { +; CHECK-NOI32X2-LABEL: test_insertelement( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<4>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_insertelement_param_0]; +; CHECK-NOI32X2-NEXT: ld.param.b32 %r3, [test_insertelement_param_1]; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_insertelement( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<3>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_insertelement_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r2, _}, %rd1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-I32X2-NEXT: ret; + %i = insertelement <2 x i32> %a, i32 %x, i64 1 + ret <2 x i32> %i +} + +define <2 x i32> @test_fptosi_2xhalf_to_2xi32(<2 x half> %a) #0 { +; CHECK-LABEL: test_fptosi_2xhalf_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_2xhalf_to_2xi32_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: cvt.rzi.s32.f16 %r2, %rs2; +; CHECK-NEXT: cvt.rzi.s32.f16 %r3, %rs1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; +; CHECK-NEXT: ret; + %r = fptosi <2 x half> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i32> @test_fptoui_2xhalf_to_2xi32(<2 x half> %a) #0 { +; CHECK-LABEL: test_fptoui_2xhalf_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xhalf_to_2xi32_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: cvt.rzi.u32.f16 %r2, %rs2; +; CHECK-NEXT: cvt.rzi.u32.f16 %r3, %rs1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; +; CHECK-NEXT: ret; + %r = fptoui <2 x half> %a to <2 x i32> + ret <2 x i32> %r +} + +define void @test_srem_v2i32(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v2i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.b64 %rd3, [test_srem_v2i32_param_2]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_srem_v2i32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_srem_v2i32_param_0]; +; CHECK-NEXT: ld.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-NEXT: ld.v2.b32 {%r3, %r4}, [%rd2]; +; CHECK-NEXT: rem.s32 %r5, %r2, %r4; +; CHECK-NEXT: rem.s32 %r6, %r1, %r3; +; CHECK-NEXT: st.v2.b32 [%rd3], {%r6, %r5}; +; CHECK-NEXT: ret; +entry: + %t57 = load <2 x i32>, ptr %a, align 8 + %t59 = load <2 x i32>, ptr %b, align 8 + %x = srem <2 x i32> %t57, %t59 + store <2 x i32> %x, ptr %c, align 8 + ret void +} + +define void @test_srem_v3i32(ptr %a, ptr %b, ptr %c) { +; CHECK-NOI32X2-LABEL: test_srem_v3i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<10>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<10>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: // %entry +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd3, [test_srem_v3i32_param_2]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd2, [test_srem_v3i32_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_srem_v3i32_param_0]; +; CHECK-NOI32X2-NEXT: ld.b32 %r1, [%rd1+8]; +; CHECK-NOI32X2-NEXT: ld.b64 %rd4, [%rd1]; +; CHECK-NOI32X2-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r2}, %rd4; } +; CHECK-NOI32X2-NEXT: cvt.u32.u64 %r3, %rd4; +; CHECK-NOI32X2-NEXT: ld.b32 %r4, [%rd2+8]; +; CHECK-NOI32X2-NEXT: ld.b64 %rd5, [%rd2]; +; CHECK-NOI32X2-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r5}, %rd5; } +; CHECK-NOI32X2-NEXT: cvt.u32.u64 %r6, %rd5; +; CHECK-NOI32X2-NEXT: rem.s32 %r7, %r3, %r6; +; CHECK-NOI32X2-NEXT: cvt.u64.u32 %rd6, %r7; +; CHECK-NOI32X2-NEXT: rem.s32 %r8, %r2, %r5; +; CHECK-NOI32X2-NEXT: cvt.u64.u32 %rd7, %r8; +; CHECK-NOI32X2-NEXT: shl.b64 %rd8, %rd7, 32; +; CHECK-NOI32X2-NEXT: or.b64 %rd9, %rd6, %rd8; +; CHECK-NOI32X2-NEXT: rem.s32 %r9, %r1, %r4; +; CHECK-NOI32X2-NEXT: st.b32 [%rd3+8], %r9; +; CHECK-NOI32X2-NEXT: st.b64 [%rd3], %rd9; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_srem_v3i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<10>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: // %entry +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [test_srem_v3i32_param_2]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_srem_v3i32_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_srem_v3i32_param_0]; +; CHECK-I32X2-NEXT: ld.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-I32X2-NEXT: ld.b32 %r3, [%rd1+8]; +; CHECK-I32X2-NEXT: ld.v2.b32 {%r4, %r5}, [%rd2]; +; CHECK-I32X2-NEXT: ld.b32 %r6, [%rd2+8]; +; CHECK-I32X2-NEXT: rem.s32 %r7, %r3, %r6; +; CHECK-I32X2-NEXT: rem.s32 %r8, %r2, %r5; +; CHECK-I32X2-NEXT: rem.s32 %r9, %r1, %r4; +; CHECK-I32X2-NEXT: st.v2.b32 [%rd3], {%r9, %r8}; +; CHECK-I32X2-NEXT: st.b32 [%rd3+8], %r7; +; CHECK-I32X2-NEXT: ret; +entry: + %t57 = load <3 x i32>, ptr %a, align 8 + %t59 = load <3 x i32>, ptr %b, align 8 + %x = srem <3 x i32> %t57, %t59 + store <3 x i32> %x, ptr %c, align 8 + ret void +} + +define void @test_sext_v2i1_to_v2i32(ptr %a, ptr %b, ptr %c) { +; CHECK-NOI32X2-LABEL: test_sext_v2i1_to_v2i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .pred %p<3>; +; CHECK-NOI32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOI32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: // %entry +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd3, [test_sext_v2i1_to_v2i32_param_2]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd2, [test_sext_v2i1_to_v2i32_param_1]; +; CHECK-NOI32X2-NEXT: ld.param.b64 %rd1, [test_sext_v2i1_to_v2i32_param_0]; +; CHECK-NOI32X2-NEXT: ld.b32 %r1, [%rd1]; +; CHECK-NOI32X2-NEXT: ld.b32 %r2, [%rd1+4]; +; CHECK-NOI32X2-NEXT: ld.b32 %r3, [%rd2]; +; CHECK-NOI32X2-NEXT: ld.b32 %r4, [%rd2+4]; +; CHECK-NOI32X2-NEXT: setp.gt.u32 %p1, %r2, %r4; +; CHECK-NOI32X2-NEXT: setp.gt.u32 %p2, %r1, %r3; +; CHECK-NOI32X2-NEXT: selp.b32 %r5, -1, 0, %p2; +; CHECK-NOI32X2-NEXT: selp.b32 %r6, -1, 0, %p1; +; CHECK-NOI32X2-NEXT: st.b32 [%rd3+4], %r6; +; CHECK-NOI32X2-NEXT: st.b32 [%rd3], %r5; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_sext_v2i1_to_v2i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .pred %p<3>; +; CHECK-I32X2-NEXT: .reg .b32 %r<7>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<14>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: // %entry +; CHECK-I32X2-NEXT: ld.param.b64 %rd3, [test_sext_v2i1_to_v2i32_param_2]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd2, [test_sext_v2i1_to_v2i32_param_1]; +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_sext_v2i1_to_v2i32_param_0]; +; CHECK-I32X2-NEXT: ld.b32 %rd4, [%rd1]; +; CHECK-I32X2-NEXT: ld.b32 %rd5, [%rd1+4]; +; CHECK-I32X2-NEXT: shl.b64 %rd6, %rd5, 32; +; CHECK-I32X2-NEXT: or.b64 %rd7, %rd6, %rd4; +; CHECK-I32X2-NEXT: ld.b32 %rd8, [%rd2]; +; CHECK-I32X2-NEXT: ld.b32 %rd9, [%rd2+4]; +; CHECK-I32X2-NEXT: shl.b64 %rd10, %rd9, 32; +; CHECK-I32X2-NEXT: or.b64 %rd11, %rd10, %rd8; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd11; +; CHECK-I32X2-NEXT: mov.b64 {%r3, %r4}, %rd7; +; CHECK-I32X2-NEXT: setp.gt.u32 %p1, %r3, %r1; +; CHECK-I32X2-NEXT: setp.gt.u32 %p2, %r4, %r2; +; CHECK-I32X2-NEXT: selp.b32 %r5, -1, 0, %p2; +; CHECK-I32X2-NEXT: selp.b32 %r6, -1, 0, %p1; +; CHECK-I32X2-NEXT: mov.b64 %rd12, {%r6, %r5}; +; CHECK-I32X2-NEXT: st.b32 [%rd3], %rd12; +; CHECK-I32X2-NEXT: shr.u64 %rd13, %rd12, 32; +; CHECK-I32X2-NEXT: st.b32 [%rd3+4], %rd13; +; CHECK-I32X2-NEXT: ret; +entry: + %t1 = load <2 x i32>, ptr %a, align 4 + %t2 = load <2 x i32>, ptr %b, align 4 + %t5 = icmp ugt <2 x i32> %t1, %t2 + %t6 = sext <2 x i1> %t5 to <2 x i32> + store <2 x i32> %t6, ptr %c, align 4 + ret void +} + +define <2 x float> @test_uitofp_v2i32(<2 x i32> %a) { +; CHECK-NOI32X2-LABEL: test_uitofp_v2i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_v2i32_param_0]; +; CHECK-NOI32X2-NEXT: cvt.rn.f32.u32 %r3, %r2; +; CHECK-NOI32X2-NEXT: cvt.rn.f32.u32 %r4, %r1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_uitofp_v2i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_uitofp_v2i32_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: cvt.rn.f32.u32 %r3, %r2; +; CHECK-I32X2-NEXT: cvt.rn.f32.u32 %r4, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = uitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_sitofp_v2i32(<2 x i32> %a) { +; CHECK-NOI32X2-LABEL: test_sitofp_v2i32( +; CHECK-NOI32X2: { +; CHECK-NOI32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOI32X2-EMPTY: +; CHECK-NOI32X2-NEXT: // %bb.0: +; CHECK-NOI32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_v2i32_param_0]; +; CHECK-NOI32X2-NEXT: cvt.rn.f32.s32 %r3, %r2; +; CHECK-NOI32X2-NEXT: cvt.rn.f32.s32 %r4, %r1; +; CHECK-NOI32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOI32X2-NEXT: ret; +; +; CHECK-I32X2-LABEL: test_sitofp_v2i32( +; CHECK-I32X2: { +; CHECK-I32X2-NEXT: .reg .b32 %r<5>; +; CHECK-I32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-I32X2-EMPTY: +; CHECK-I32X2-NEXT: // %bb.0: +; CHECK-I32X2-NEXT: ld.param.b64 %rd1, [test_sitofp_v2i32_param_0]; +; CHECK-I32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-I32X2-NEXT: cvt.rn.f32.s32 %r3, %r2; +; CHECK-I32X2-NEXT: cvt.rn.f32.s32 %r4, %r1; +; CHECK-I32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-I32X2-NEXT: ret; + %r = sitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 1a7a72d..693a40d 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -142,6 +142,7 @@ ; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode). ; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD(.UW) + load macrofusion. ; CHECK-NEXT: sifive7 - SiFive 7-Series processors. +; CHECK-NEXT: single-element-vec-fp64 - Certain vector FP64 operations produce a single result element per cycle. ; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level). ; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level). ; CHECK-NEXT: smcntrpmf - 'Smcntrpmf' (Cycle and Instret Privilege Mode Filtering). diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index d6252cc..150bef0 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -645,11 +645,47 @@ define float @test_maxnum_const_op2(float %x) { ret float %r } -define float @test_maxnum_const_nan(float %x) { -; CHECK-LABEL: test_maxnum_const_nan: -; CHECK: # %bb.0: -; CHECK-NEXT: retq - %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) +define float @test_maxnum_const_nan(float %x, float %y) { +; SSE-LABEL: test_maxnum_const_nan: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_maxnum_const_nan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.maxnum.f32(float %y, float 0x7fff000000000000) + ret float %r +} + +; nnan maxnum(Y, -inf) -> Y +define float @test_maxnum_neg_inf_nnan(float %x, float %y) nounwind { +; SSE-LABEL: test_maxnum_neg_inf_nnan: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_maxnum_neg_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq + %r = call nnan float @llvm.maxnum.f32(float %y, float 0xfff0000000000000) + ret float %r +} + +; Test SNaN quieting +define float @test_maxnum_snan(float %x) { +; SSE-LABEL: test_maxnum_snan: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; SSE-NEXT: retq +; +; AVX-LABEL: test_maxnum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: retq + %r = call float @llvm.maxnum.f32(float 0x7ff4000000000000, float %x) ret float %r } diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 864c233..06515e4 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -2649,3 +2649,102 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { %r = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) ret <4 x bfloat> %r } + +; nnan minimum(Y, +inf) -> Y +define float @test_fminimum_inf_nnan(float %x, float %y) nounwind { +; SSE2-LABEL: test_fminimum_inf_nnan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fminimum_inf_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fminimum_inf_nnan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = call nnan float @llvm.minimum.f32(float %y, float 0x7ff0000000000000) + ret float %1 +} + +; nnan maximum(Y, -inf) -> Y +define float @test_fmaximum_neg_inf_nnan(float %x, float %y) nounwind { +; SSE2-LABEL: test_fmaximum_neg_inf_nnan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_neg_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fmaximum_neg_inf_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fmaximum_neg_inf_nnan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = call nnan float @llvm.maximum.f32(float %y, float 0xfff0000000000000) + ret float %1 +} + +; Test SNaN quieting +define float @test_fmaximum_snan(float %x) { +; SSE2-LABEL: test_fmaximum_snan: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fmaximum_snan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fmaximum_snan: +; X86: # %bb.0: +; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NEXT: retl + %1 = tail call float @llvm.maximum.f32(float 0x7ff4000000000000, float %x) + ret float %1 +} + +define float @test_fminimum_snan(float %x) { +; SSE2-LABEL: test_fminimum_snan: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fminimum_snan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fminimum_snan: +; X86: # %bb.0: +; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-NEXT: retl + %1 = tail call float @llvm.minimum.f32(float 0x7ff4000000000000, float %x) + ret float %1 +} diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll index c66473e..0fe107c 100644 --- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll +++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll @@ -2479,3 +2479,102 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n %r = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) ret <4 x bfloat> %r } + +; nnan minimumnum(Y, +inf) -> Y +define float @test_fminimumnum_inf_nnan(float %x, float %y) nounwind { +; SSE2-LABEL: test_fminimumnum_inf_nnan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimumnum_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fminimumnum_inf_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fminimumnum_inf_nnan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = call nnan float @llvm.minimumnum.f32(float %y, float 0x7ff0000000000000) + ret float %1 +} + +; nnan maximumnum(Y, -inf) -> Y +define float @test_fmaximumnum_neg_inf_nnan(float %x, float %y) nounwind { +; SSE2-LABEL: test_fmaximumnum_neg_inf_nnan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximumnum_neg_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fmaximumnum_neg_inf_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fmaximumnum_neg_inf_nnan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = call nnan float @llvm.maximumnum.f32(float %y, float 0xfff0000000000000) + ret float %1 +} + +; Test we propagate the non-NaN arg, even if one arg is SNaN +define float @test_fmaximumnum_snan(float %x, float %y) { +; SSE2-LABEL: test_fmaximumnum_snan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximumnum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fmaximumnum_snan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fmaximumnum_snan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = tail call float @llvm.maximumnum.f32(float 0x7ff4000000000000, float %y) + ret float %1 +} + +define float @test_fminimumnum_snan(float %x, float %y) { +; SSE2-LABEL: test_fminimumnum_snan: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimumnum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq +; +; AVX10_2-LABEL: test_fminimumnum_snan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; +; X86-LABEL: test_fminimumnum_snan: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl + %1 = tail call float @llvm.minimumnum.f32(float 0x7ff4000000000000, float %y) + ret float %1 +} diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index 0ef8fde..4aa1a61 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -645,11 +645,47 @@ define float @test_minnum_const_op2(float %x) { ret float %r } -define float @test_minnum_const_nan(float %x) { -; CHECK-LABEL: test_minnum_const_nan: -; CHECK: # %bb.0: -; CHECK-NEXT: retq - %r = call float @llvm.minnum.f32(float %x, float 0x7fff000000000000) +define float @test_minnum_const_nan(float %x, float %y) { +; SSE-LABEL: test_minnum_const_nan: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_minnum_const_nan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.minnum.f32(float %y, float 0x7fff000000000000) + ret float %r +} + +; nnan minnum(Y, +inf) -> Y +define float @test_minnum_inf_nnan(float %x, float %y) nounwind { +; SSE-LABEL: test_minnum_inf_nnan: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_minnum_inf_nnan: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq + %r = call nnan float @llvm.minnum.f32(float %y, float 0x7ff0000000000000) + ret float %r +} + +; Test SNaN quieting +define float @test_minnum_snan(float %x) { +; SSE-LABEL: test_minnum_snan: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; SSE-NEXT: retq +; +; AVX-LABEL: test_minnum_snan: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: retq + %r = call float @llvm.minnum.f32(float 0x7ff4000000000000, float %x) ret float %r } diff --git a/llvm/test/CodeGen/X86/pgo-profile-o0.ll b/llvm/test/CodeGen/X86/pgo-profile-o0.ll new file mode 100644 index 0000000..f9704fc --- /dev/null +++ b/llvm/test/CodeGen/X86/pgo-profile-o0.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=PASSES +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-only=branch-prob %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=BRANCH_PROB +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -stop-after=finalize-isel %s -o - | FileCheck %s --check-prefix=MIR + +; REQUIRES: asserts + +; This test verifies that PGO profile information (branch weights) is preserved +; during instruction selection at -O0. + +; Test function with explicit branch weights from PGO. +define i32 @test_pgo_preservation(i32 %x) !prof !15 { +entry: + %cmp = icmp sgt i32 %x, 10 + ; This branch has bias: 97 taken vs 3 not taken + br i1 %cmp, label %if.then, label %if.else, !prof !16 + +if.then: + ; Hot path - should have high frequency + %add = add nsw i32 %x, 100 + br label %if.end + +if.else: + ; Cold path - should have low frequency + %sub = sub nsw i32 %x, 50 + br label %if.end + +if.end: + %result = phi i32 [ %add, %if.then ], [ %sub, %if.else ] + ret i32 %result +} + +; Profile metadata with branch weights 97:3. +!15 = !{!"function_entry_count", i64 100} +!16 = !{!"branch_weights", i32 97, i32 3} + +; Verify that Branch Probability Analysis runs at O0. +; PASSES: Branch Probability Analysis + +; Verify that the branch probabilities reflect the exact profile data. +; BRANCH_PROB: ---- Branch Probability Info : test_pgo_preservation ---- +; BRANCH_PROB: set edge entry -> 0 successor probability to {{.*}} = 97.00% +; BRANCH_PROB: set edge entry -> 1 successor probability to {{.*}} = 3.00% + +; Verify that machine IR preserves the branch probabilities from profile data +; MIR: bb.0.entry: +; MIR-NEXT: successors: %bb.{{[0-9]+}}({{0x03d70a3d|0x7c28f5c3}}), %bb.{{[0-9]+}}({{0x7c28f5c3|0x03d70a3d}}) +; The two successor probability values should be: +; - 0x7c28f5c3: approximately 97% (high probability successor) +; - 0x03d70a3d: approximately 3% (low probability successor) diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_select.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_select.ll index fb14782..9352211 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_select.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_select.ll @@ -3,16 +3,17 @@ target datalayout = "e-m:m-p1:64:64:64-p:32:32:32-n8:16:32" -define dso_local i16 @select_i16(i16 %a, i16 %b, i1 %cond) { +define dso_local i16 @select_i16(i16 %a, i16 %b, i1 %cond) !prof !0 { ; CHECK-LABEL: @select_i16( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], i16 [[A:%.*]], i16 [[B:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], i16 [[A:%.*]], i16 [[B:%.*]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: ret i16 [[SEL]] ; entry: %conv0 = sext i16 %a to i32 %conv1 = sext i16 %b to i32 - %sel = select i1 %cond, i32 %conv0, i32 %conv1 + %sel = select i1 %cond, i32 %conv0, i32 %conv1, !prof !1 %conv4 = trunc i32 %sel to i16 ret i16 %conv4 } @@ -134,3 +135,8 @@ entry: ret i16 %conv4 } +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 2, i32 3} + diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_select_cmp.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_select_cmp.ll index ac9cf2d..69ad625 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_select_cmp.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_select_cmp.ll @@ -1,19 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s -define dso_local i16 @cmp_select_sext_const(i8 %a) { +define dso_local i16 @cmp_select_sext_const(i8 %a) !prof !0 { ; CHECK-LABEL: @cmp_select_sext_const( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 109 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 109, i32 [[CONV]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 109, i32 [[CONV]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 ; CHECK-NEXT: ret i16 [[CONV4]] ; entry: %conv = sext i8 %a to i32 %cmp = icmp slt i32 %conv, 109 - %cond = select i1 %cmp, i32 109, i32 %conv + %cond = select i1 %cmp, i32 109, i32 %conv, !prof !1 %conv4 = trunc i32 %cond to i16 ret i16 %conv4 } @@ -209,3 +210,7 @@ define i16 @cmp_select_unsigned_const_i16Const_noTransformation(i8 %a) { ret i16 %conv4 } +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 2, i32 3} diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll index 4eec7ed..722693d 100644 --- a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll @@ -1,4 +1,8 @@ -; Testing elide performed its job for calls to coroutines marked safe. +; Coroutine calls marked with `coro_elide_safe` should be elided. +; Inside `caller`, we expect the `callee` coroutine to be elided. +; Inside `caller_conditional`, `callee` is only called on an unlikely +; path, hence we expect the `callee` coroutine NOT to be elided. +; ; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s %struct.Task = type { ptr } @@ -57,7 +61,7 @@ define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) { ; Function Attrs: presplitcoroutine define ptr @caller() #0 { entry: - %task = call ptr @callee(i8 0) #1 + %task = call ptr @callee(i8 0) coro_elide_safe ret ptr %task ; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8 ; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8 @@ -69,6 +73,25 @@ entry: ; CHECK-NEXT: ret ptr %[[TASK]] } +; CHECK-LABEL: define ptr @caller_conditional(i1 %cond) +; Function Attrs: presplitcoroutine +define ptr @caller_conditional(i1 %cond) #0 { +entry: + br i1 %cond, label %call, label %ret + +call: + ; CHECK-NOT: alloca + ; CHECK-NOT: @llvm.coro.id({{.*}}, ptr @callee, {{.*}}) + ; CHECK: %task = call ptr @callee(i8 0) + ; CHECK-NEXT: br label %ret + %task = call ptr @callee(i8 0) coro_elide_safe + br label %ret + +ret: + %retval = phi ptr [ %task, %call ], [ null, %entry ] + ret ptr %retval +} + declare token @llvm.coro.id(i32, ptr, ptr, ptr) declare ptr @llvm.coro.begin(token, ptr) declare ptr @llvm.coro.frame() @@ -76,4 +99,3 @@ declare ptr @llvm.coro.subfn.addr(ptr, i8) declare i1 @llvm.coro.alloc(token) attributes #0 = { presplitcoroutine } -attributes #1 = { coro_elide_safe } diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll index 663f459..de38752 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll @@ -227,10 +227,6 @@ define i32 @test3(i32 %num) { ; CHECK-NEXT: i32 1, label [[CASE1:%.*]] ; CHECK-NEXT: i32 2, label [[CASE2:%.*]] ; CHECK-NEXT: ] -; CHECK: for.body.jt4: -; CHECK-NEXT: [[COUNT_JT4:%.*]] = phi i32 [ [[INC_JT4:%.*]], [[FOR_INC_JT4:%.*]] ] -; CHECK-NEXT: [[STATE_JT4:%.*]] = phi i32 [ [[STATE_NEXT_JT4:%.*]], [[FOR_INC_JT4]] ] -; CHECK-NEXT: br label [[FOR_INC_JT1]] ; CHECK: for.body.jt3: ; CHECK-NEXT: [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ] ; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ] @@ -261,17 +257,14 @@ define i32 @test3(i32 %num) { ; CHECK: sel.2.si.unfold.false: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 4, [[SEL_2_SI_UNFOLD_TRUE_JT3]] ] ; CHECK-NEXT: br label [[SEL_3_SI_UNFOLD_FALSE]] -; CHECK: sel.2.si.unfold.false.jt4: +; CHECK: sel.2.si.unfold.false.jt3: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1_JT4:%.*]] = phi i32 [ 4, [[SEL_2_SI_UNFOLD_TRUE:%.*]] ] -; CHECK-NEXT: br label [[SEL_3_SI_UNFOLD_FALSE_JT4:%.*]] +; CHECK-NEXT: br label [[SEL_3_SI_UNFOLD_FALSE_JT3]] ; CHECK: sel.3.si.unfold.false: ; CHECK-NEXT: [[SEL_2_SI_UNFOLD_PHI:%.*]] = phi i32 [ poison, [[SEL_2_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SEL_2_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: sel.3.si.unfold.false.jt4: -; CHECK-NEXT: [[SEL_2_SI_UNFOLD_PHI_JT4:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI1_JT4]], [[SEL_2_SI_UNFOLD_FALSE_JT4]] ] -; CHECK-NEXT: br label [[FOR_INC_JT4]] ; CHECK: sel.3.si.unfold.false.jt3: -; CHECK-NEXT: [[SEL_2_SI_UNFOLD_PHI_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI_JT3]], [[SEL_2_SI_UNFOLD_TRUE_JT3]] ] +; CHECK-NEXT: [[SEL_2_SI_UNFOLD_PHI_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI_JT3]], [[SEL_2_SI_UNFOLD_TRUE_JT3]] ], [ [[DOTSI_UNFOLD_PHI1_JT4]], [[SEL_2_SI_UNFOLD_FALSE_JT4]] ] ; CHECK-NEXT: br label [[FOR_INC_JT3]] ; CHECK: sel.1.si.unfold.true: ; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_INC]], label [[SEL_1_SI_UNFOLD_FALSE_JT2:%.*]] @@ -289,11 +282,6 @@ define i32 @test3(i32 %num) { ; CHECK-NEXT: [[INC]] = add nsw i32 [[COUNT5]], 1 ; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]] ; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CHECK: for.inc.jt4: -; CHECK-NEXT: [[STATE_NEXT_JT4]] = phi i32 [ [[SEL_2_SI_UNFOLD_PHI_JT4]], [[SEL_3_SI_UNFOLD_FALSE_JT4]] ] -; CHECK-NEXT: [[INC_JT4]] = add nsw i32 undef, 1 -; CHECK-NEXT: [[CMP_EXIT_JT4:%.*]] = icmp slt i32 [[INC_JT4]], [[NUM]] -; CHECK-NEXT: br i1 [[CMP_EXIT_JT4]], label [[FOR_BODY_JT4:%.*]], label [[FOR_END]] ; CHECK: for.inc.jt3: ; CHECK-NEXT: [[STATE_NEXT_JT3]] = phi i32 [ [[SEL_2_SI_UNFOLD_PHI_JT3]], [[SEL_3_SI_UNFOLD_FALSE_JT3]] ] ; CHECK-NEXT: [[INC_JT3]] = add nsw i32 [[COUNT5]], 1 @@ -305,8 +293,8 @@ define i32 @test3(i32 %num) { ; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]] ; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]] ; CHECK: for.inc.jt1: -; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT4]], [[FOR_BODY_JT4]] ], [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT5]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ], [ [[COUNT]], [[FOR_BODY]] ] -; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[FOR_BODY_JT3]] ], [ 1, [[FOR_BODY_JT4]] ], [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ] +; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT5]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ], [ [[COUNT]], [[FOR_BODY]] ] +; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[FOR_BODY_JT3]] ], [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ] ; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT4]], 1 ; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]] ; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]] @@ -402,36 +390,28 @@ define void @pr65222(i32 %flags, i1 %cmp, i1 %tobool.not) { ; CHECK-NEXT: br label [[IF_END_JT2:%.*]] ; CHECK: cond1.si.unfold.true: ; CHECK-NEXT: br i1 [[CMP]], label [[IF_END]], label [[COND1_SI_UNFOLD_FALSE_JT1:%.*]] -; CHECK: cond1.si.unfold.true.jt3: +; CHECK: cond1.si.unfold.true.jt2: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 3, [[THEN]] ] -; CHECK-NEXT: br i1 [[CMP]], label [[IF_END_JT3:%.*]], label [[COND1_SI_UNFOLD_FALSE:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_END_JT2]], label [[COND1_SI_UNFOLD_FALSE:%.*]] ; CHECK: cond1.si.unfold.false: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI3:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE]] ] ; CHECK-NEXT: br label [[IF_END]] -; CHECK: cond1.si.unfold.false.jt1: +; CHECK: cond1.si.unfold.false.jt2: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI3_JT1:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE1:%.*]] ] -; CHECK-NEXT: br label [[IF_END_JT1:%.*]] +; CHECK-NEXT: br label [[IF_END_JT2]] ; CHECK: if.end: ; CHECK-NEXT: [[UNFOLDED:%.*]] = phi i32 [ [[FLAGS:%.*]], [[WHILE_COND]] ], [ [[COND_SI_UNFOLD_PHI]], [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ poison, [[COND1_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI3]], [[COND1_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ [[FLAGS]], [[WHILE_COND]] ], [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ 0, [[COND1_SI_UNFOLD_TRUE1]] ], [ 0, [[COND1_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: switch i32 [[UNFOLDED]], label [[UNREACHABLE:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: ] -; CHECK: if.end.jt1: -; CHECK-NEXT: [[UNFOLDED_JT1:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI3_JT1]], [[COND1_SI_UNFOLD_FALSE_JT1]] ] -; CHECK-NEXT: [[OTHER_JT1:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_FALSE_JT1]] ] -; CHECK-NEXT: br label [[UNREACHABLE]] -; CHECK: if.end.jt3: -; CHECK-NEXT: [[UNFOLDED_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ] -; CHECK-NEXT: [[OTHER_JT3:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_TRUE]] ] -; CHECK-NEXT: br label [[UNREACHABLE]] ; CHECK: if.end.jt0: ; CHECK-NEXT: [[UNFOLDED_JT0:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT0]], [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ] ; CHECK-NEXT: [[OTHER_JT0:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ] ; CHECK-NEXT: br label [[SW_BB]] ; CHECK: if.end.jt2: -; CHECK-NEXT: [[UNFOLDED_JT2:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT2]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ] -; CHECK-NEXT: [[OTHER_JT2:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[UNFOLDED_JT2:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT2]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI3_JT1]], [[COND1_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: [[OTHER_JT2:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ 0, [[COND1_SI_UNFOLD_TRUE]] ], [ 0, [[COND1_SI_UNFOLD_FALSE_JT1]] ] ; CHECK-NEXT: br label [[UNREACHABLE]] ; CHECK: unreachable: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll new file mode 100644 index 0000000..4555dfb --- /dev/null +++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll @@ -0,0 +1,281 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s + +declare void @do_something() +declare void @user(i32) + +define void @equivalent_on_default(i1 %c1) { +; CHECK-LABEL: define void @equivalent_on_default( +; CHECK-SAME: i1 [[C1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SWITCH_BB:%.*]] +; CHECK: switch_bb: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE1:%.*]] +; CHECK-NEXT: i32 1, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: switch_bb.jt2: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ] +; CHECK-NEXT: br label [[DEFAULT_DEST]] +; CHECK: switch_bb.jt1: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ] +; CHECK-NEXT: br label [[CASE2]] +; CHECK: case1: +; CHECK-NEXT: br label [[SWITCH_BB_JT1:%.*]] +; CHECK: case2: +; CHECK-NEXT: br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]] +; CHECK: case2then: +; CHECK-NEXT: br label [[CASE2END_JT2]] +; CHECK: case2end: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB]] +; CHECK: case2end.jt2: +; CHECK-NEXT: [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ] +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT2:%.*]] +; CHECK: default_dest: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + ] + +case1: + br label %switch_bb + +case2: + br i1 %c1, label %case2then, label %case2end + +case2then: + br label %case2end + +case2end: + %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ] + call void @do_something() + br label %switch_bb + +default_dest: + ret void +} + +define void @equivalent_on_default_user(i1 %c1) { +; CHECK-LABEL: define void @equivalent_on_default_user( +; CHECK-SAME: i1 [[C1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SWITCH_BB:%.*]] +; CHECK: switch_bb: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE1:%.*]] +; CHECK-NEXT: i32 1, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: switch_bb.jt2: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ] +; CHECK-NEXT: br label [[DEFAULT_DEST]] +; CHECK: switch_bb.jt1: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ] +; CHECK-NEXT: br label [[CASE2]] +; CHECK: case1: +; CHECK-NEXT: br label [[SWITCH_BB_JT1:%.*]] +; CHECK: case2: +; CHECK-NEXT: br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]] +; CHECK: case2then: +; CHECK-NEXT: br label [[CASE2END_JT2]] +; CHECK: case2end: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @user(i32 poison) +; CHECK-NEXT: br label [[SWITCH_BB]] +; CHECK: case2end.jt2: +; CHECK-NEXT: [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ] +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: call void @user(i32 [[PHI_CASE2_JT2]]) +; CHECK-NEXT: br label [[SWITCH_BB_JT2:%.*]] +; CHECK: default_dest: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + ] + +case1: + br label %switch_bb + +case2: + br i1 %c1, label %case2then, label %case2end + +case2then: + br label %case2end + +case2end: + %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ] + call void @do_something() + call void @user(i32 %phi_case2) + br label %switch_bb + +default_dest: + ret void +} + +define void @equivalent_only_cases(i1 %c1) { +; CHECK-LABEL: define void @equivalent_only_cases( +; CHECK-SAME: i1 [[C1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SWITCH_BB:%.*]] +; CHECK: switch_bb: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE1:%.*]] +; CHECK-NEXT: i32 1, label [[CASE2:%.*]] +; CHECK-NEXT: i32 2, label [[CASE1]] +; CHECK-NEXT: i32 3, label [[CASE1]] +; CHECK-NEXT: ] +; CHECK: switch_bb.jt2: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ] +; CHECK-NEXT: br label [[CASE1]] +; CHECK: switch_bb.jt1: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ] +; CHECK-NEXT: br label [[CASE2]] +; CHECK: case1: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT1:%.*]] +; CHECK: case2: +; CHECK-NEXT: br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]] +; CHECK: case2then: +; CHECK-NEXT: br label [[CASE2END_JT2]] +; CHECK: case2end: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB]] +; CHECK: case2end.jt2: +; CHECK-NEXT: [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ] +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT2:%.*]] +; CHECK: default_dest: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case1 + i32 3, label %case1 + ] + +case1: + call void @do_something() + br label %switch_bb + +case2: + br i1 %c1, label %case2then, label %case2end + +case2then: + br label %case2end + +case2end: + %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ] + call void @do_something() + br label %switch_bb + +default_dest: + ret void +} + +define void @equivalent_both_case_and_default(i1 %c1, i1 %c2) { +; CHECK-LABEL: define void @equivalent_both_case_and_default( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SWITCH_BB:%.*]] +; CHECK: switch_bb: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE1:%.*]] +; CHECK-NEXT: i32 1, label [[CASE2:%.*]] +; CHECK-NEXT: i32 2, label [[CASE1]] +; CHECK-NEXT: i32 3, label [[CASE1]] +; CHECK-NEXT: ] +; CHECK: switch_bb.jt4: +; CHECK-NEXT: [[PHI_JT3:%.*]] = phi i32 [ [[PHI_CASE2_JT3:%.*]], [[CASE2END_JT3:%.*]] ] +; CHECK-NEXT: br label [[DEFAULT_DEST]] +; CHECK: switch_bb.jt2: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ] +; CHECK-NEXT: br label [[CASE1]] +; CHECK: switch_bb.jt1: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ] +; CHECK-NEXT: br label [[CASE2]] +; CHECK: case1: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT1:%.*]] +; CHECK: case2: +; CHECK-NEXT: br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]] +; CHECK: case2then: +; CHECK-NEXT: br i1 [[C2]], label [[CASE2THEN2:%.*]], label [[CASE2END_JT2]] +; CHECK: case2then2: +; CHECK-NEXT: br i1 [[C2]], label [[CASE2THEN3:%.*]], label [[CASE2END_JT3]] +; CHECK: case2then3: +; CHECK-NEXT: br label [[CASE2END_JT3]] +; CHECK: case2end: +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB]] +; CHECK: case2end.jt4: +; CHECK-NEXT: [[PHI_CASE2_JT3]] = phi i32 [ 4, [[CASE2THEN2]] ], [ 5, [[CASE2THEN3]] ] +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT3:%.*]] +; CHECK: case2end.jt2: +; CHECK-NEXT: [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ] +; CHECK-NEXT: call void @do_something() +; CHECK-NEXT: br label [[SWITCH_BB_JT2:%.*]] +; CHECK: default_dest: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case1 + i32 3, label %case1 + ] + +case1: + call void @do_something() + br label %switch_bb + +case2: + br i1 %c1, label %case2then, label %case2end + +case2then: + br i1 %c2, label %case2then2, label %case2end + +case2then2: + br i1 %c2, label %case2then3, label %case2end + +case2then3: + br label %case2end + +case2end: + %phi_case2 = phi i32 [ 2, %case2 ], [ 3, %case2then ], [ 4, %case2then2 ], [ 5, %case2then3 ] + call void @do_something() + br label %switch_bb + +default_dest: + ret void +} diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index ebea5bf..d88eaf8 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -1,8 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare i1 @gen1() +;. +; CHECK: @glb = global i8 0 +;. define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) { ; CHECK-LABEL: @cond_eq_and( ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[X:%.*]], [[C:%.*]] @@ -16,16 +19,16 @@ define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) { ret i1 %res } -define i1 @cond_eq_and_const(i8 %X, i8 %Y) { +define i1 @cond_eq_and_const(i8 %X, i8 %Y) !prof !0 { ; CHECK-LABEL: @cond_eq_and_const( ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[X:%.*]], 10 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], 10 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[COND]], i1 [[TMP1]], i1 false +; CHECK-NEXT: [[RES:%.*]] = select i1 [[COND]], i1 [[TMP1]], i1 false, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i1 [[RES]] ; %cond = icmp eq i8 %X, 10 %lhs = icmp ult i8 %X, %Y - %res = select i1 %cond, i1 %lhs, i1 false + %res = select i1 %cond, i1 %lhs, i1 false, !prof !1 ret i1 %res } @@ -42,16 +45,16 @@ define i1 @cond_eq_or(i8 %X, i8 %Y, i8 noundef %C) { ret i1 %res } -define i1 @cond_eq_or_const(i8 %X, i8 %Y) { +define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 { ; CHECK-LABEL: @cond_eq_or_const( ; CHECK-NEXT: [[COND:%.*]] = icmp ne i8 [[X:%.*]], 10 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], 10 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[TMP1]], !prof [[PROF1]] ; CHECK-NEXT: ret i1 [[RES]] ; %cond = icmp ne i8 %X, 10 %lhs = icmp ult i8 %X, %Y - %res = select i1 %cond, i1 true, i1 %lhs + %res = select i1 %cond, i1 true, i1 %lhs, !prof !1 ret i1 %res } @@ -793,3 +796,10 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { %or = select <2 x i1> %and, <2 x i1> <i1 true, i1 true>, <2 x i1> %implied ret <2 x i1> %or } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index c225ede5..65058bd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -621,8 +621,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 ; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]] -; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0 -; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0 ; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; I32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -644,14 +642,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1 ; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1 ; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1 -; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0 -; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1 -; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2 -; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3 -; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4 -; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5 -; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6 -; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7 ; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]] ; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] ; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] @@ -677,22 +667,21 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6 ; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7 ; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer -; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]] -; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0 +; I32-NEXT: [[TMP53:%.*]] = mul i64 [[TMP11]], [[START]] +; I32-NEXT: [[TMP55:%.*]] = mul i64 [[TMP12]], [[START]] +; I32-NEXT: [[TMP57:%.*]] = mul i64 [[TMP13]], [[START]] +; I32-NEXT: [[TMP59:%.*]] = mul i64 [[TMP14]], [[START]] +; I32-NEXT: [[TMP61:%.*]] = mul i64 [[TMP15]], [[START]] +; I32-NEXT: [[TMP63:%.*]] = mul i64 [[TMP16]], [[START]] +; I32-NEXT: [[TMP65:%.*]] = mul i64 [[TMP17]], [[START]] +; I32-NEXT: [[TMP67:%.*]] = mul i64 [[TMP18]], [[START]] ; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]] -; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1 ; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]] -; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2 ; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]] -; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3 ; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]] -; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4 ; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]] -; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5 ; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]] -; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6 ; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]] -; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7 ; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]] ; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0 ; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1 @@ -774,7 +763,222 @@ exit: ret void } -attributes #0 = { "target-cpu"="znver3" } +define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %src.1, i32 %x) #0 { +; I64-LABEL: define void @address_use_in_different_block( +; I64-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; I64-NEXT: [[ENTRY:.*:]] +; I64-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0) +; I64-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64 +; I64-NEXT: br label %[[VECTOR_PH:.*]] +; I64: [[VECTOR_PH]]: +; I64-NEXT: br label %[[VECTOR_BODY:.*]] +; I64: [[VECTOR_BODY]]: +; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; I64-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]] +; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]] +; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]] +; I64-NEXT: [[TMP12:%.*]] = mul i64 [[TMP4]], [[OFFSET]] +; I64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP5]], [[OFFSET]] +; I64-NEXT: [[TMP14:%.*]] = mul i64 [[TMP6]], [[OFFSET]] +; I64-NEXT: [[TMP15:%.*]] = mul i64 [[TMP7]], [[OFFSET]] +; I64-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP8]] +; I64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP9]] +; I64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP10]] +; I64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP11]] +; I64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP12]] +; I64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP13]] +; I64-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP14]] +; I64-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP15]] +; I64-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP16]], align 4 +; I64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP17]], align 4 +; I64-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP18]], align 4 +; I64-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP19]], align 4 +; I64-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4 +; I64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP21]], align 4 +; I64-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP22]], align 4 +; I64-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP23]], align 4 +; I64-NEXT: [[TMP32:%.*]] = sext i32 [[TMP24]] to i64 +; I64-NEXT: [[TMP33:%.*]] = sext i32 [[TMP25]] to i64 +; I64-NEXT: [[TMP34:%.*]] = sext i32 [[TMP26]] to i64 +; I64-NEXT: [[TMP35:%.*]] = sext i32 [[TMP27]] to i64 +; I64-NEXT: [[TMP36:%.*]] = sext i32 [[TMP28]] to i64 +; I64-NEXT: [[TMP37:%.*]] = sext i32 [[TMP29]] to i64 +; I64-NEXT: [[TMP38:%.*]] = sext i32 [[TMP30]] to i64 +; I64-NEXT: [[TMP39:%.*]] = sext i32 [[TMP31]] to i64 +; I64-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP32]] +; I64-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP33]] +; I64-NEXT: [[TMP42:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP34]] +; I64-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP35]] +; I64-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP36]] +; I64-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP37]] +; I64-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP38]] +; I64-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP39]] +; I64-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 -8 +; I64-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[TMP41]], i64 -8 +; I64-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP42]], i64 -8 +; I64-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[TMP43]], i64 -8 +; I64-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP44]], i64 -8 +; I64-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[TMP45]], i64 -8 +; I64-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP46]], i64 -8 +; I64-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP47]], i64 -8 +; I64-NEXT: [[TMP56:%.*]] = load double, ptr [[TMP48]], align 8 +; I64-NEXT: [[TMP57:%.*]] = load double, ptr [[TMP49]], align 8 +; I64-NEXT: [[TMP58:%.*]] = insertelement <2 x double> poison, double [[TMP56]], i32 0 +; I64-NEXT: [[TMP59:%.*]] = insertelement <2 x double> [[TMP58]], double [[TMP57]], i32 1 +; I64-NEXT: [[TMP60:%.*]] = load double, ptr [[TMP50]], align 8 +; I64-NEXT: [[TMP61:%.*]] = load double, ptr [[TMP51]], align 8 +; I64-NEXT: [[TMP62:%.*]] = insertelement <2 x double> poison, double [[TMP60]], i32 0 +; I64-NEXT: [[TMP63:%.*]] = insertelement <2 x double> [[TMP62]], double [[TMP61]], i32 1 +; I64-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP52]], align 8 +; I64-NEXT: [[TMP65:%.*]] = load double, ptr [[TMP53]], align 8 +; I64-NEXT: [[TMP66:%.*]] = insertelement <2 x double> poison, double [[TMP64]], i32 0 +; I64-NEXT: [[TMP67:%.*]] = insertelement <2 x double> [[TMP66]], double [[TMP65]], i32 1 +; I64-NEXT: [[TMP68:%.*]] = load double, ptr [[TMP54]], align 8 +; I64-NEXT: [[TMP69:%.*]] = load double, ptr [[TMP55]], align 8 +; I64-NEXT: [[TMP70:%.*]] = insertelement <2 x double> poison, double [[TMP68]], i32 0 +; I64-NEXT: [[TMP71:%.*]] = insertelement <2 x double> [[TMP70]], double [[TMP69]], i32 1 +; I64-NEXT: [[TMP72:%.*]] = fsub <2 x double> zeroinitializer, [[TMP59]] +; I64-NEXT: [[TMP73:%.*]] = fsub <2 x double> zeroinitializer, [[TMP63]] +; I64-NEXT: [[TMP74:%.*]] = fsub <2 x double> zeroinitializer, [[TMP67]] +; I64-NEXT: [[TMP75:%.*]] = fsub <2 x double> zeroinitializer, [[TMP71]] +; I64-NEXT: [[TMP76:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP8]] +; I64-NEXT: [[TMP77:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP9]] +; I64-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]] +; I64-NEXT: [[TMP79:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP11]] +; I64-NEXT: [[TMP80:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]] +; I64-NEXT: [[TMP81:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP13]] +; I64-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]] +; I64-NEXT: [[TMP83:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP15]] +; I64-NEXT: [[TMP84:%.*]] = extractelement <2 x double> [[TMP72]], i32 0 +; I64-NEXT: store double [[TMP84]], ptr [[TMP76]], align 8 +; I64-NEXT: [[TMP85:%.*]] = extractelement <2 x double> [[TMP72]], i32 1 +; I64-NEXT: store double [[TMP85]], ptr [[TMP77]], align 8 +; I64-NEXT: [[TMP86:%.*]] = extractelement <2 x double> [[TMP73]], i32 0 +; I64-NEXT: store double [[TMP86]], ptr [[TMP78]], align 8 +; I64-NEXT: [[TMP87:%.*]] = extractelement <2 x double> [[TMP73]], i32 1 +; I64-NEXT: store double [[TMP87]], ptr [[TMP79]], align 8 +; I64-NEXT: [[TMP88:%.*]] = extractelement <2 x double> [[TMP74]], i32 0 +; I64-NEXT: store double [[TMP88]], ptr [[TMP80]], align 8 +; I64-NEXT: [[TMP89:%.*]] = extractelement <2 x double> [[TMP74]], i32 1 +; I64-NEXT: store double [[TMP89]], ptr [[TMP81]], align 8 +; I64-NEXT: [[TMP90:%.*]] = extractelement <2 x double> [[TMP75]], i32 0 +; I64-NEXT: store double [[TMP90]], ptr [[TMP82]], align 8 +; I64-NEXT: [[TMP91:%.*]] = extractelement <2 x double> [[TMP75]], i32 1 +; I64-NEXT: store double [[TMP91]], ptr [[TMP83]], align 8 +; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; I64-NEXT: [[TMP92:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; I64-NEXT: br i1 [[TMP92]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; I64: [[MIDDLE_BLOCK]]: +; I64-NEXT: br label %[[SCALAR_PH:.*]] +; I64: [[SCALAR_PH]]: +; +; I32-LABEL: define void @address_use_in_different_block( +; I32-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; I32-NEXT: [[ENTRY:.*:]] +; I32-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0) +; I32-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64 +; I32-NEXT: br label %[[VECTOR_PH:.*]] +; I32: [[VECTOR_PH]]: +; I32-NEXT: br label %[[VECTOR_BODY:.*]] +; I32: [[VECTOR_BODY]]: +; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]] +; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]] +; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]] +; I32-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP4]] +; I32-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP5]] +; I32-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP6]] +; I32-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP7]] +; I32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 +; I32-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4 +; I32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 4 +; I32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 +; I32-NEXT: [[TMP16:%.*]] = sext i32 [[TMP12]] to i64 +; I32-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 +; I32-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64 +; I32-NEXT: [[TMP19:%.*]] = sext i32 [[TMP15]] to i64 +; I32-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP16]] +; I32-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP17]] +; I32-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP18]] +; I32-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP19]] +; I32-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP20]], i64 -8 +; I32-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 -8 +; I32-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 -8 +; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP23]], i64 -8 +; I32-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP24]], align 8 +; I32-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP25]], align 8 +; I32-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP26]], align 8 +; I32-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP27]], align 8 +; I32-NEXT: [[TMP32:%.*]] = insertelement <4 x double> poison, double [[TMP28]], i32 0 +; I32-NEXT: [[TMP33:%.*]] = insertelement <4 x double> [[TMP32]], double [[TMP29]], i32 1 +; I32-NEXT: [[TMP34:%.*]] = insertelement <4 x double> [[TMP33]], double [[TMP30]], i32 2 +; I32-NEXT: [[TMP35:%.*]] = insertelement <4 x double> [[TMP34]], double [[TMP31]], i32 3 +; I32-NEXT: [[TMP36:%.*]] = fsub <4 x double> zeroinitializer, [[TMP35]] +; I32-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]] +; I32-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP5]] +; I32-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP6]] +; I32-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP7]] +; I32-NEXT: [[TMP41:%.*]] = extractelement <4 x double> [[TMP36]], i32 0 +; I32-NEXT: store double [[TMP41]], ptr [[TMP37]], align 8 +; I32-NEXT: [[TMP42:%.*]] = extractelement <4 x double> [[TMP36]], i32 1 +; I32-NEXT: store double [[TMP42]], ptr [[TMP38]], align 8 +; I32-NEXT: [[TMP43:%.*]] = extractelement <4 x double> [[TMP36]], i32 2 +; I32-NEXT: store double [[TMP43]], ptr [[TMP39]], align 8 +; I32-NEXT: [[TMP44:%.*]] = extractelement <4 x double> [[TMP36]], i32 3 +; I32-NEXT: store double [[TMP44]], ptr [[TMP40]], align 8 +; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; I32-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; I32-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; I32: [[MIDDLE_BLOCK]]: +; I32-NEXT: br label %[[SCALAR_PH:.*]] +; I32: [[SCALAR_PH]]: +; +entry: + %x.pos = call i32 @llvm.smax.i32(i32 %x, i32 0) + %offset = zext i32 %x.pos to i64 + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %7 = mul i64 %iv, %offset + %gep.src.0 = getelementptr i32, ptr %src.0, i64 %7 + %l8 = load i32, ptr %gep.src.0, align 4 + %c = icmp sgt i32 %x, 0 + br i1 %c, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %l.ext = sext i32 %l8 to i64 + %gep.src.1 = getelementptr double, ptr %src.1, i64 %l.ext + %13 = getelementptr i8, ptr %gep.src.1, i64 -8 + %l.2 = load double, ptr %13, align 8 + %sub = fsub double 0.000000e+00, %l.2 + %gep.dst = getelementptr double, ptr %dst, i64 %7 + store double %sub, ptr %gep.dst, align 8 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + attributes #0 = { "target-cpu"="znver2" } !0 = distinct !{!0, !1} diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 774f0db..f293ed1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -186,12 +186,11 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 777) ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 777 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = udiv i32 [[TMP4]], [[X]] @@ -201,7 +200,8 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]] ; CHECK: pred.udiv.if3: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP7]], 777 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]] @@ -212,7 +212,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll index 0fc3c19..a43e762 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll @@ -401,4 +401,27 @@ b: ret i32 %1 } +define i32 @else_will_be_unreachable(i1 %arg) { +; CHECK-LABEL: @else_will_be_unreachable( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I:%.*]] = select i1 [[ARG:%.*]], i32 0, i32 1 +; CHECK-NEXT: ret i32 [[I]] +; +entry: + switch i1 %arg, label %else [ + i1 false, label %if + i1 true, label %if + ] + +if: + br i1 %arg, label %else, label %bb + +bb: + br label %else + +else: + %i = phi i32 [ 0, %entry ], [ 0, %if ], [ 1, %bb ] + ret i32 %i +} + declare void @bar(ptr nonnull dereferenceable(4)) diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s index e1e9b57..64e3ed9 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s @@ -2323,13 +2323,13 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 3 1.00 U 1 VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 # CHECK-NEXT: 1 228 228.00 228 VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 228 228.00 228 VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf v8, v16, fs0 # CHECK-NEXT: 1 228 228.00 228 VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf v8, v16, fs0 @@ -2352,22 +2352,22 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 228 228.00 228 VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v v8, v24 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v v8, v24 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v v8, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 +# CHECK-NEXT: 1 19 16.00 19 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v v8, v16 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 -# CHECK-NEXT: 1 16 16.00 16 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 +# CHECK-NEXT: 1 23 16.00 23 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 2.00 8 VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v v8, v16 @@ -2384,13 +2384,13 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 3 1.00 U 1 VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 # CHECK-NEXT: 1 456 456.00 456 VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 456 456.00 456 VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf v8, v16, fs0 # CHECK-NEXT: 1 456 456.00 456 VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf v8, v16, fs0 @@ -2413,22 +2413,22 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 456 456.00 456 VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v v8, v24 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v v8, v24 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v v8, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 +# CHECK-NEXT: 1 35 32.00 35 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v v8, v16 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 -# CHECK-NEXT: 1 32 32.00 32 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 +# CHECK-NEXT: 1 39 32.00 39 VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 4.00 8 VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v v8, v16 @@ -2445,13 +2445,13 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 3 1.00 U 1 VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 # CHECK-NEXT: 1 912 912.00 912 VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 912 912.00 912 VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf v8, v16, fs0 # CHECK-NEXT: 1 912 912.00 912 VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf v8, v16, fs0 @@ -2474,22 +2474,22 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 912 912.00 912 VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v v8, v24 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v v8, v24 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v v8, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 +# CHECK-NEXT: 1 67 64.00 67 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v v8, v16 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 -# CHECK-NEXT: 1 64 64.00 64 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 +# CHECK-NEXT: 1 71 64.00 71 VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v v8, v16 @@ -2506,13 +2506,13 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 3 1.00 U 1 VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv v8, v16, v24 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf v8, v16, fs0 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv v8, v16, v24 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf v8, v16, fs0 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv v8, v16, v24 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf v8, v16, fs0 # CHECK-NEXT: 1 1824 1824.00 1824 VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1824 1824.00 1824 VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf v8, v16, fs0 # CHECK-NEXT: 1 1824 1824.00 1824 VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf v8, v16, fs0 @@ -2535,22 +2535,22 @@ vfncvt.rod.f.f.w v8, v16 # CHECK-NEXT: 1 1824 1824.00 1824 VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v v8, v24 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v v8, v24 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v v8, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv v8, v16, v24 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf v8, v16, fs0 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv v8, v16, v24 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf v8, v16, fs0 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv v8, v16, v24 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf v8, v16, fs0 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv v8, v16, v24 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf v8, v16, fs0 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv v8, v16, v24 +# CHECK-NEXT: 1 131 128.00 131 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf v8, v16, fs0 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v v8, v16 # CHECK-NEXT: 1 8 16.00 8 VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v v8, v16 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 -# CHECK-NEXT: 1 128 128.00 128 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v v8, v16 +# CHECK-NEXT: 1 135 128.00 135 VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v v8, v16 # CHECK-NEXT: 1 8 8.00 8 VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v v8, v16 diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 7551a80..f04b256 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -243,6 +244,39 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass( cl::desc("Run compiler only for specified passes (comma separated list)"), cl::value_desc("pass-name"), cl::location(RunPassOpt)); +// PGO command line options +enum PGOKind { + NoPGO, + SampleUse, +}; + +static cl::opt<PGOKind> + PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden, + cl::desc("The kind of profile guided optimization"), + cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."), + clEnumValN(SampleUse, "pgo-sample-use-pipeline", + "Use sampled profile to guide PGO."))); + +// Function to set PGO options on TargetMachine based on command line flags. +static void setPGOOptions(TargetMachine &TM) { + std::optional<PGOOptions> PGOOpt; + + switch (PGOKindFlag) { + case SampleUse: + // Use default values for other PGOOptions parameters. This parameter + // is used to test that PGO data is preserved at -O0. + PGOOpt = PGOOptions("", "", "", "", PGOOptions::SampleUse, + PGOOptions::NoCSAction); + break; + case NoPGO: + PGOOpt = std::nullopt; + break; + } + + if (PGOOpt) + TM.setPGOOption(PGOOpt); +} + static int compileModule(char **, LLVMContext &); [[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") { @@ -558,6 +592,9 @@ static int compileModule(char **argv, LLVMContext &Context) { TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); + // Set PGO options based on command line flags + setPGOOptions(*Target); + return Target->createDataLayout().getStringRepresentation(); }; if (InputLanguage == "mir" || @@ -601,6 +638,9 @@ static int compileModule(char **argv, LLVMContext &Context) { TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); + // Set PGO options based on command line flags + setPGOOptions(*Target); + // If we don't have a module then just exit now. We do this down // here since the CPU/Feature help is underneath the target machine // creation. diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp index 434449c..1031932 100644 --- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp +++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp @@ -253,25 +253,17 @@ public: break; } case BasicBlockLevel: { - const auto &BBVecMap = Emb->getBBVecMap(); for (const BasicBlock &BB : F) { - auto It = BBVecMap.find(&BB); - if (It != BBVecMap.end()) { - OS << BB.getName() << ":"; - It->second.print(OS); - } + OS << BB.getName() << ":"; + Emb->getBBVector(BB).print(OS); } break; } case InstructionLevel: { - const auto &InstMap = Emb->getInstVecMap(); for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { - auto It = InstMap.find(&I); - if (It != InstMap.end()) { - I.print(OS); - It->second.print(OS); - } + I.print(OS); + Emb->getInstVector(I).print(OS); } } break; diff --git a/llvm/unittests/ADT/BitsetTest.cpp b/llvm/unittests/ADT/BitsetTest.cpp new file mode 100644 index 0000000..8877397 --- /dev/null +++ b/llvm/unittests/ADT/BitsetTest.cpp @@ -0,0 +1,44 @@ +//===- llvm/unittest/Support/BitsetTest.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Bitset.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +template <unsigned NumBits> +class TestBitsetUInt64Array : public Bitset<NumBits> { + static constexpr unsigned NumElts = (NumBits + 63) / 64; + +public: + TestBitsetUInt64Array(const std::array<uint64_t, NumElts> &B) + : Bitset<NumBits>(B) {} + + bool verifyValue(const std::array<uint64_t, NumElts> &B) const { + for (unsigned I = 0; I != NumBits; ++I) { + bool ReferenceVal = + (B[(I / 64)] & (static_cast<uint64_t>(1) << (I % 64))) != 0; + if (ReferenceVal != this->test(I)) + return false; + } + + return true; + } +}; + +TEST(BitsetTest, Construction) { + std::array<uint64_t, 2> TestVals = {0x123456789abcdef3, 0x1337d3a0b22c24}; + TestBitsetUInt64Array<96> Test(TestVals); + EXPECT_TRUE(Test.verifyValue(TestVals)); + + TestBitsetUInt64Array<65> Test1(TestVals); + EXPECT_TRUE(Test1.verifyValue(TestVals)); +} +} // namespace diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index dafd735..848ccba 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_unittest(ADTTests BitFieldsTest.cpp BitmaskEnumTest.cpp BitTest.cpp + BitsetTest.cpp BitVectorTest.cpp BreadthFirstIteratorTest.cpp BumpPtrListTest.cpp diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp index 40b4aa2..8ffc5f6 100644 --- a/llvm/unittests/Analysis/IR2VecTest.cpp +++ b/llvm/unittests/Analysis/IR2VecTest.cpp @@ -30,7 +30,9 @@ namespace { class TestableEmbedder : public Embedder { public: TestableEmbedder(const Function &F, const Vocabulary &V) : Embedder(F, V) {} - void computeEmbeddings(const BasicBlock &BB) const override {} + Embedding computeEmbeddings(const Instruction &I) const override { + return Embedding(); + } }; TEST(EmbeddingTest, ConstructorsAndAccessors) { @@ -321,18 +323,12 @@ protected: } }; -TEST_F(IR2VecTestFixture, GetInstVecMap_Symbolic) { +TEST_F(IR2VecTestFixture, GetInstVec_Symbolic) { auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V); ASSERT_TRUE(static_cast<bool>(Emb)); - const auto &InstMap = Emb->getInstVecMap(); - - EXPECT_EQ(InstMap.size(), 2u); - EXPECT_TRUE(InstMap.count(AddInst)); - EXPECT_TRUE(InstMap.count(RetInst)); - - const auto &AddEmb = InstMap.at(AddInst); - const auto &RetEmb = InstMap.at(RetInst); + const auto &AddEmb = Emb->getInstVector(*AddInst); + const auto &RetEmb = Emb->getInstVector(*RetInst); EXPECT_EQ(AddEmb.size(), 2u); EXPECT_EQ(RetEmb.size(), 2u); @@ -340,51 +336,17 @@ TEST_F(IR2VecTestFixture, GetInstVecMap_Symbolic) { EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 15.5))); } -TEST_F(IR2VecTestFixture, GetInstVecMap_FlowAware) { - auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V); - ASSERT_TRUE(static_cast<bool>(Emb)); - - const auto &InstMap = Emb->getInstVecMap(); - - EXPECT_EQ(InstMap.size(), 2u); - EXPECT_TRUE(InstMap.count(AddInst)); - EXPECT_TRUE(InstMap.count(RetInst)); - - EXPECT_EQ(InstMap.at(AddInst).size(), 2u); - EXPECT_EQ(InstMap.at(RetInst).size(), 2u); - - EXPECT_TRUE(InstMap.at(AddInst).approximatelyEquals(Embedding(2, 25.5))); - EXPECT_TRUE(InstMap.at(RetInst).approximatelyEquals(Embedding(2, 32.6))); -} - -TEST_F(IR2VecTestFixture, GetBBVecMap_Symbolic) { - auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V); - ASSERT_TRUE(static_cast<bool>(Emb)); - - const auto &BBMap = Emb->getBBVecMap(); - - EXPECT_EQ(BBMap.size(), 1u); - EXPECT_TRUE(BBMap.count(BB)); - EXPECT_EQ(BBMap.at(BB).size(), 2u); - - // BB vector should be sum of add and ret: {25.5, 25.5} + {15.5, 15.5} = - // {41.0, 41.0} - EXPECT_TRUE(BBMap.at(BB).approximatelyEquals(Embedding(2, 41.0))); -} - -TEST_F(IR2VecTestFixture, GetBBVecMap_FlowAware) { +TEST_F(IR2VecTestFixture, GetInstVec_FlowAware) { auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V); ASSERT_TRUE(static_cast<bool>(Emb)); - const auto &BBMap = Emb->getBBVecMap(); - - EXPECT_EQ(BBMap.size(), 1u); - EXPECT_TRUE(BBMap.count(BB)); - EXPECT_EQ(BBMap.at(BB).size(), 2u); + const auto &AddEmb = Emb->getInstVector(*AddInst); + const auto &RetEmb = Emb->getInstVector(*RetInst); + EXPECT_EQ(AddEmb.size(), 2u); + EXPECT_EQ(RetEmb.size(), 2u); - // BB vector should be sum of add and ret: {25.5, 25.5} + {32.6, 32.6} = - // {58.1, 58.1} - EXPECT_TRUE(BBMap.at(BB).approximatelyEquals(Embedding(2, 58.1))); + EXPECT_TRUE(AddEmb.approximatelyEquals(Embedding(2, 25.5))); + EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 32.6))); } TEST_F(IR2VecTestFixture, GetBBVector_Symbolic) { @@ -394,6 +356,8 @@ TEST_F(IR2VecTestFixture, GetBBVector_Symbolic) { const auto &BBVec = Emb->getBBVector(*BB); EXPECT_EQ(BBVec.size(), 2u); + // BB vector should be sum of add and ret: {25.5, 25.5} + {15.5, 15.5} = + // {41.0, 41.0} EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 41.0))); } @@ -404,6 +368,8 @@ TEST_F(IR2VecTestFixture, GetBBVector_FlowAware) { const auto &BBVec = Emb->getBBVector(*BB); EXPECT_EQ(BBVec.size(), 2u); + // BB vector should be sum of add and ret: {25.5, 25.5} + {32.6, 32.6} = + // {58.1, 58.1} EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 58.1))); } @@ -446,15 +412,9 @@ TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_Symbolic) { EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3)); EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3)); - // Also check that instruction vectors remain consistent - const auto &InstMap1 = Emb->getInstVecMap(); - const auto &InstMap2 = Emb->getInstVecMap(); - - EXPECT_EQ(InstMap1.size(), InstMap2.size()); - for (const auto &[Inst, Vec1] : InstMap1) { - ASSERT_TRUE(InstMap2.count(Inst)); - EXPECT_TRUE(Vec1.approximatelyEquals(InstMap2.at(Inst))); - } + Emb->invalidateEmbeddings(); + const auto &FuncVec4 = Emb->getFunctionVector(); + EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4)); } TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_FlowAware) { @@ -473,15 +433,9 @@ TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_FlowAware) { EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3)); EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3)); - // Also check that instruction vectors remain consistent - const auto &InstMap1 = Emb->getInstVecMap(); - const auto &InstMap2 = Emb->getInstVecMap(); - - EXPECT_EQ(InstMap1.size(), InstMap2.size()); - for (const auto &[Inst, Vec1] : InstMap1) { - ASSERT_TRUE(InstMap2.count(Inst)); - EXPECT_TRUE(Vec1.approximatelyEquals(InstMap2.at(Inst))); - } + Emb->invalidateEmbeddings(); + const auto &FuncVec4 = Emb->getFunctionVector(); + EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4)); } static constexpr unsigned MaxOpcodes = Vocabulary::MaxOpcodes; diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 255f62d..1436f0f 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -767,4 +767,39 @@ TEST_F(ConstantFPRangeTest, makeExactFCmpRegion) { } } +TEST_F(ConstantFPRangeTest, abs) { + EXPECT_EQ(Full.abs(), + ConstantFPRange(APFloat::getZero(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/false), + /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true)); + EXPECT_EQ(Empty.abs(), Empty); + EXPECT_EQ(Zero.abs(), PosZero); + EXPECT_EQ(PosInf.abs(), PosInf); + EXPECT_EQ(NegInf.abs(), PosInf); + EXPECT_EQ(Some.abs(), SomePos); + EXPECT_EQ(SomeNeg.abs(), SomePos); + EXPECT_EQ(NaN.abs(), NaN); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0)).abs(), + ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(3.0))); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0)).abs(), + ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(3.0))); +} + +TEST_F(ConstantFPRangeTest, negate) { + EXPECT_EQ(Full.negate(), Full); + EXPECT_EQ(Empty.negate(), Empty); + EXPECT_EQ(Zero.negate(), Zero); + EXPECT_EQ(PosInf.negate(), NegInf); + EXPECT_EQ(NegInf.negate(), PosInf); + EXPECT_EQ(Some.negate(), Some); + EXPECT_EQ(SomePos.negate(), SomeNeg); + EXPECT_EQ(SomeNeg.negate(), SomePos); + EXPECT_EQ(NaN.negate(), NaN); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0)).negate(), + ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0))); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0)).negate(), + ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0))); +} + } // anonymous namespace diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp index e2c4422..3cad4a4 100644 --- a/llvm/unittests/Support/MustacheTest.cpp +++ b/llvm/unittests/Support/MustacheTest.cpp @@ -22,7 +22,10 @@ using namespace llvm::json; TEST(MustacheInterpolation, NoInterpolation) { // Mustache-free templates should render as-is. Value D = {}; - Template T("Hello from {Mustache}!\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello from {Mustache}!\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -32,7 +35,10 @@ TEST(MustacheInterpolation, NoInterpolation) { TEST(MustacheInterpolation, BasicInterpolation) { // Unadorned tags should interpolate content into the template. Value D = Object{{"subject", "World"}}; - Template T("Hello, {{subject}}!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{subject}}!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -42,7 +48,10 @@ TEST(MustacheInterpolation, BasicInterpolation) { TEST(MustacheInterpolation, NoReinterpolation) { // Interpolated tag output should not be re-interpolated. Value D = Object{{"template", "{{planet}}"}, {"planet", "Earth"}}; - Template T("{{template}}: {{planet}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{template}}: {{planet}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -54,7 +63,10 @@ TEST(MustacheInterpolation, HTMLEscaping) { Value D = Object{ {"forbidden", "& \" < >"}, }; - Template T("These characters should be HTML escaped: {{forbidden}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("These characters should be HTML escaped: {{forbidden}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -67,7 +79,11 @@ TEST(MustacheInterpolation, Ampersand) { Value D = Object{ {"forbidden", "& \" < >"}, }; - Template T("These characters should not be HTML escaped: {{&forbidden}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("These characters should not be HTML escaped: {{&forbidden}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -77,7 +93,10 @@ TEST(MustacheInterpolation, Ampersand) { TEST(MustacheInterpolation, BasicIntegerInterpolation) { // Integers should interpolate seamlessly. Value D = Object{{"mph", 85}}; - Template T("{{mph}} miles an hour!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{mph}} miles an hour!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -87,7 +106,10 @@ TEST(MustacheInterpolation, BasicIntegerInterpolation) { TEST(MustacheInterpolation, AmpersandIntegerInterpolation) { // Integers should interpolate seamlessly. Value D = Object{{"mph", 85}}; - Template T("{{&mph}} miles an hour!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{&mph}} miles an hour!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -97,7 +119,10 @@ TEST(MustacheInterpolation, AmpersandIntegerInterpolation) { TEST(MustacheInterpolation, BasicDecimalInterpolation) { // Decimals should interpolate seamlessly with proper significance. Value D = Object{{"power", 1.21}}; - Template T("{{power}} jiggawatts!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{power}} jiggawatts!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -107,7 +132,10 @@ TEST(MustacheInterpolation, BasicDecimalInterpolation) { TEST(MustacheInterpolation, BasicNullInterpolation) { // Nulls should interpolate as the empty string. Value D = Object{{"cannot", nullptr}}; - Template T("I ({{cannot}}) be seen!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("I ({{cannot}}) be seen!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -117,7 +145,10 @@ TEST(MustacheInterpolation, BasicNullInterpolation) { TEST(MustacheInterpolation, AmpersandNullInterpolation) { // Nulls should interpolate as the empty string. Value D = Object{{"cannot", nullptr}}; - Template T("I ({{&cannot}}) be seen!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("I ({{&cannot}}) be seen!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -127,7 +158,10 @@ TEST(MustacheInterpolation, AmpersandNullInterpolation) { TEST(MustacheInterpolation, BasicContextMissInterpolation) { // Failed context lookups should default to empty strings. Value D = Object{}; - Template T("I ({{cannot}}) be seen!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("I ({{cannot}}) be seen!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -137,7 +171,10 @@ TEST(MustacheInterpolation, BasicContextMissInterpolation) { TEST(MustacheInterpolation, DottedNamesBasicInterpolation) { // Dotted names should be considered a form of shorthand for sections. Value D = Object{{"person", Object{{"name", "Joe"}}}}; - Template T("{{person.name}} == {{#person}}{{name}}{{/person}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{person.name}} == {{#person}}{{name}}{{/person}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -147,7 +184,10 @@ TEST(MustacheInterpolation, DottedNamesBasicInterpolation) { TEST(MustacheInterpolation, DottedNamesAmpersandInterpolation) { // Dotted names should be considered a form of shorthand for sections. Value D = Object{{"person", Object{{"name", "Joe"}}}}; - Template T("{{&person.name}} == {{#person}}{{&name}}{{/person}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{&person.name}} == {{#person}}{{&name}}{{/person}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -162,7 +202,10 @@ TEST(MustacheInterpolation, DottedNamesArbitraryDepth) { Object{{"c", Object{{"d", Object{{"e", Object{{"name", "Phil"}}}}}}}}}}}}; - Template T("{{a.b.c.d.e.name}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{a.b.c.d.e.name}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -172,7 +215,10 @@ TEST(MustacheInterpolation, DottedNamesArbitraryDepth) { TEST(MustacheInterpolation, DottedNamesBrokenChains) { // Any falsey value prior to the last part of the name should yield ''. Value D = Object{{"a", Object{}}}; - Template T("{{a.b.c}} == "); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{a.b.c}} == ", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -183,7 +229,10 @@ TEST(MustacheInterpolation, DottedNamesBrokenChainResolution) { // Each part of a dotted name should resolve only against its parent. Value D = Object{{"a", Object{{"b", Object{}}}}, {"c", Object{{"name", "Jim"}}}}; - Template T("{{a.b.c.name}} == "); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{a.b.c.name}} == ", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -200,7 +249,10 @@ TEST(MustacheInterpolation, DottedNamesInitialResolution) { Object{{"d", Object{{"e", Object{{"name", "Phil"}}}}}}}}}}}, {"b", Object{{"c", Object{{"d", Object{{"e", Object{{"name", "Wrong"}}}}}}}}}}; - Template T("{{#a}}{{b.c.d.e.name}}{{/a}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#a}}{{b.c.d.e.name}}{{/a}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -211,7 +263,10 @@ TEST(MustacheInterpolation, DottedNamesContextPrecedence) { // Dotted names should be resolved against former resolutions. Value D = Object{{"a", Object{{"b", Object{}}}}, {"b", Object{{"c", "ERROR"}}}}; - Template T("{{#a}}{{b.c}}{{/a}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#a}}{{b.c}}{{/a}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -221,7 +276,10 @@ TEST(MustacheInterpolation, DottedNamesContextPrecedence) { TEST(MustacheInterpolation, DottedNamesAreNotSingleKeys) { // Dotted names shall not be parsed as single, atomic keys Value D = Object{{"a.b", "c"}}; - Template T("{{a.b}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{a.b}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -231,7 +289,10 @@ TEST(MustacheInterpolation, DottedNamesAreNotSingleKeys) { TEST(MustacheInterpolation, DottedNamesNoMasking) { // Dotted Names in a given context are unavailable due to dot splitting Value D = Object{{"a.b", "c"}, {"a", Object{{"b", "d"}}}}; - Template T("{{a.b}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{a.b}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -241,7 +302,10 @@ TEST(MustacheInterpolation, DottedNamesNoMasking) { TEST(MustacheInterpolation, ImplicitIteratorsBasicInterpolation) { // Unadorned tags should interpolate content into the template. Value D = "world"; - Template T("Hello, {{.}}!\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{.}}!\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -251,7 +315,10 @@ TEST(MustacheInterpolation, ImplicitIteratorsBasicInterpolation) { TEST(MustacheInterpolation, ImplicitIteratorsAmersand) { // Basic interpolation should be HTML escaped. Value D = "& \" < >"; - Template T("These characters should not be HTML escaped: {{&.}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("These characters should not be HTML escaped: {{&.}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -261,7 +328,10 @@ TEST(MustacheInterpolation, ImplicitIteratorsAmersand) { TEST(MustacheInterpolation, ImplicitIteratorsInteger) { // Integers should interpolate seamlessly. Value D = 85; - Template T("{{.}} miles an hour!\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{.}} miles an hour!\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -271,7 +341,10 @@ TEST(MustacheInterpolation, ImplicitIteratorsInteger) { TEST(MustacheInterpolation, InterpolationSurroundingWhitespace) { // Interpolation should not alter surrounding whitespace. Value D = Object{{"string", "---"}}; - Template T("| {{string}} |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| {{string}} |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -281,7 +354,10 @@ TEST(MustacheInterpolation, InterpolationSurroundingWhitespace) { TEST(MustacheInterpolation, AmersandSurroundingWhitespace) { // Interpolation should not alter surrounding whitespace. Value D = Object{{"string", "---"}}; - Template T("| {{&string}} |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| {{&string}} |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -291,7 +367,10 @@ TEST(MustacheInterpolation, AmersandSurroundingWhitespace) { TEST(MustacheInterpolation, StandaloneInterpolationWithWhitespace) { // Standalone interpolation should not alter surrounding whitespace. Value D = Object{{"string", "---"}}; - Template T(" {{string}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{string}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -301,7 +380,10 @@ TEST(MustacheInterpolation, StandaloneInterpolationWithWhitespace) { TEST(MustacheInterpolation, StandaloneAmpersandWithWhitespace) { // Standalone interpolation should not alter surrounding whitespace. Value D = Object{{"string", "---"}}; - Template T(" {{&string}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{&string}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -311,7 +393,10 @@ TEST(MustacheInterpolation, StandaloneAmpersandWithWhitespace) { TEST(MustacheInterpolation, InterpolationWithPadding) { // Superfluous in-tag whitespace should be ignored. Value D = Object{{"string", "---"}}; - Template T("|{{ string }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{ string }}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -321,7 +406,10 @@ TEST(MustacheInterpolation, InterpolationWithPadding) { TEST(MustacheInterpolation, AmpersandWithPadding) { // Superfluous in-tag whitespace should be ignored. Value D = Object{{"string", "---"}}; - Template T("|{{& string }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{& string }}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -331,7 +419,10 @@ TEST(MustacheInterpolation, AmpersandWithPadding) { TEST(MustacheInterpolation, InterpolationWithPaddingAndNewlines) { // Superfluous in-tag whitespace should be ignored. Value D = Object{{"string", "---"}}; - Template T("|{{ string \n\n\n }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{ string \n\n\n }}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -340,7 +431,10 @@ TEST(MustacheInterpolation, InterpolationWithPaddingAndNewlines) { TEST(MustacheSections, Truthy) { Value D = Object{{"boolean", true}}; - Template T("{{#boolean}}This should be rendered.{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#boolean}}This should be rendered.{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -349,7 +443,10 @@ TEST(MustacheSections, Truthy) { TEST(MustacheSections, Falsey) { Value D = Object{{"boolean", false}}; - Template T("{{#boolean}}This should not be rendered.{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#boolean}}This should not be rendered.{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -359,7 +456,10 @@ TEST(MustacheSections, Falsey) { TEST(MustacheInterpolation, IsFalseyNull) { // Mustache-free templates should render as-is. Value D = Object{{"boolean", nullptr}}; - Template T("Hello, {{#boolean}}World{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{#boolean}}World{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -369,7 +469,10 @@ TEST(MustacheInterpolation, IsFalseyNull) { TEST(MustacheInterpolation, IsFalseyArray) { // Mustache-free templates should render as-is. Value D = Object{{"boolean", Array()}}; - Template T("Hello, {{#boolean}}World{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{#boolean}}World{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -379,7 +482,10 @@ TEST(MustacheInterpolation, IsFalseyArray) { TEST(MustacheInterpolation, IsFalseyObject) { // Mustache-free templates should render as-is. Value D = Object{{"boolean", Object{}}}; - Template T("Hello, {{#boolean}}World{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{#boolean}}World{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -389,7 +495,10 @@ TEST(MustacheInterpolation, IsFalseyObject) { TEST(MustacheInterpolation, DoubleRendering) { // Mustache-free templates should render as-is. Value D1 = Object{{"subject", "World"}}; - Template T("Hello, {{subject}}!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{subject}}!", Ctx); std::string Out1; raw_string_ostream OS1(Out1); T.render(D1, OS1); @@ -403,7 +512,10 @@ TEST(MustacheInterpolation, DoubleRendering) { TEST(MustacheSections, NullIsFalsey) { Value D = Object{{"null", nullptr}}; - Template T("{{#null}}This should not be rendered.{{/null}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#null}}This should not be rendered.{{/null}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -412,7 +524,10 @@ TEST(MustacheSections, NullIsFalsey) { TEST(MustacheSections, Context) { Value D = Object{{"context", Object{{"name", "Joe"}}}}; - Template T("{{#context}}Hi {{name}}.{{/context}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#context}}Hi {{name}}.{{/context}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -424,7 +539,10 @@ TEST(MustacheSections, ParentContexts) { {"b", "wrong"}, {"sec", Object{{"b", "bar"}}}, {"c", Object{{"d", "baz"}}}}; - Template T("{{#sec}}{{a}}, {{b}}, {{c.d}}{{/sec}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#sec}}{{a}}, {{b}}, {{c.d}}{{/sec}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -433,7 +551,10 @@ TEST(MustacheSections, ParentContexts) { TEST(MustacheSections, VariableTest) { Value D = Object{{"foo", "bar"}}; - Template T("{{#foo}}{{.}} is {{foo}}{{/foo}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#foo}}{{.}} is {{foo}}{{/foo}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -449,6 +570,9 @@ TEST(MustacheSections, ListContexts) { Array{Object{{"mname", "1"}, {"bottoms", Array{Object{{"bname", "x"}}, Object{{"bname", "y"}}}}}}}}}}}; + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); Template T("{{#tops}}" "{{#middles}}" "{{tname.lower}}{{mname}}." @@ -456,7 +580,8 @@ TEST(MustacheSections, ListContexts) { "{{tname.upper}}{{mname}}{{bname}}." "{{/bottoms}}" "{{/middles}}" - "{{/tops}}"); + "{{/tops}}", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -468,6 +593,9 @@ TEST(MustacheSections, DeeplyNestedContexts) { {"a", Object{{"one", 1}}}, {"b", Object{{"two", 2}}}, {"c", Object{{"three", 3}, {"d", Object{{"four", 4}, {"five", 5}}}}}}; + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); Template T( "{{#a}}\n{{one}}\n{{#b}}\n{{one}}{{two}}{{one}}\n{{#c}}\n{{one}}{{two}}{{" "three}}{{two}}{{one}}\n{{#d}}\n{{one}}{{two}}{{three}}{{four}}{{three}}{" @@ -477,7 +605,8 @@ TEST(MustacheSections, DeeplyNestedContexts) { "four}}{{three}}{{two}}{{one}}\n{{/" "five}}\n{{one}}{{two}}{{three}}{{four}}{{three}}{{two}}{{one}}\n{{/" "d}}\n{{one}}{{two}}{{three}}{{two}}{{one}}\n{{/" - "c}}\n{{one}}{{two}}{{one}}\n{{/b}}\n{{one}}\n{{/a}}\n"); + "c}}\n{{one}}{{two}}{{one}}\n{{/b}}\n{{one}}\n{{/a}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -489,7 +618,10 @@ TEST(MustacheSections, DeeplyNestedContexts) { TEST(MustacheSections, List) { Value D = Object{{"list", Array{Object{{"item", 1}}, Object{{"item", 2}}, Object{{"item", 3}}}}}; - Template T("{{#list}}{{item}}{{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}{{item}}{{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -498,7 +630,10 @@ TEST(MustacheSections, List) { TEST(MustacheSections, EmptyList) { Value D = Object{{"list", Array{}}}; - Template T("{{#list}}Yay lists!{{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}Yay lists!{{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -507,8 +642,12 @@ TEST(MustacheSections, EmptyList) { TEST(MustacheSections, Doubled) { Value D = Object{{"bool", true}, {"two", "second"}}; + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); Template T("{{#bool}}\n* first\n{{/bool}}\n* " - "{{two}}\n{{#bool}}\n* third\n{{/bool}}\n"); + "{{two}}\n{{#bool}}\n* third\n{{/bool}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -517,7 +656,10 @@ TEST(MustacheSections, Doubled) { TEST(MustacheSections, NestedTruthy) { Value D = Object{{"bool", true}}; - Template T("| A {{#bool}}B {{#bool}}C{{/bool}} D{{/bool}} E |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| A {{#bool}}B {{#bool}}C{{/bool}} D{{/bool}} E |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -526,7 +668,10 @@ TEST(MustacheSections, NestedTruthy) { TEST(MustacheSections, NestedFalsey) { Value D = Object{{"bool", false}}; - Template T("| A {{#bool}}B {{#bool}}C{{/bool}} D{{/bool}} E |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| A {{#bool}}B {{#bool}}C{{/bool}} D{{/bool}} E |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -535,7 +680,10 @@ TEST(MustacheSections, NestedFalsey) { TEST(MustacheSections, ContextMisses) { Value D = Object{}; - Template T("[{{#missing}}Found key 'missing'!{{/missing}}]"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[{{#missing}}Found key 'missing'!{{/missing}}]", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -544,7 +692,10 @@ TEST(MustacheSections, ContextMisses) { TEST(MustacheSections, ImplicitIteratorString) { Value D = Object{{"list", Array{"a", "b", "c", "d", "e"}}}; - Template T("{{#list}}({{.}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{.}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -553,7 +704,10 @@ TEST(MustacheSections, ImplicitIteratorString) { TEST(MustacheSections, ImplicitIteratorInteger) { Value D = Object{{"list", Array{1, 2, 3, 4, 5}}}; - Template T("{{#list}}({{.}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{.}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -562,7 +716,10 @@ TEST(MustacheSections, ImplicitIteratorInteger) { TEST(MustacheSections, ImplicitIteratorArray) { Value D = Object{{"list", Array{Array{1, 2, 3}, Array{"a", "b", "c"}}}}; - Template T("{{#list}}({{#.}}{{.}}{{/.}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{#.}}{{.}}{{/.}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -571,7 +728,10 @@ TEST(MustacheSections, ImplicitIteratorArray) { TEST(MustacheSections, ImplicitIteratorHTMLEscaping) { Value D = Object{{"list", Array{"&", "\"", "<", ">"}}}; - Template T("{{#list}}({{.}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{.}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -580,7 +740,10 @@ TEST(MustacheSections, ImplicitIteratorHTMLEscaping) { TEST(MustacheSections, ImplicitIteratorAmpersand) { Value D = Object{{"list", Array{"&", "\"", "<", ">"}}}; - Template T("{{#list}}({{&.}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{&.}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -589,7 +752,10 @@ TEST(MustacheSections, ImplicitIteratorAmpersand) { TEST(MustacheSections, ImplicitIteratorRootLevel) { Value D = Array{Object{{"value", "a"}}, Object{{"value", "b"}}}; - Template T("{{#.}}({{value}}){{/.}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#.}}({{value}}){{/.}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -598,7 +764,10 @@ TEST(MustacheSections, ImplicitIteratorRootLevel) { TEST(MustacheSections, DottedNamesTruthy) { Value D = Object{{"a", Object{{"b", Object{{"c", true}}}}}}; - Template T("{{#a.b.c}}Here{{/a.b.c}} == Here"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#a.b.c}}Here{{/a.b.c}} == Here", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -607,7 +776,10 @@ TEST(MustacheSections, DottedNamesTruthy) { TEST(MustacheSections, DottedNamesFalsey) { Value D = Object{{"a", Object{{"b", Object{{"c", false}}}}}}; - Template T("{{#a.b.c}}Here{{/a.b.c}} == "); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#a.b.c}}Here{{/a.b.c}} == ", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -615,8 +787,11 @@ TEST(MustacheSections, DottedNamesFalsey) { } TEST(MustacheSections, DottedNamesBrokenChains) { - Value D = Object{{"a", Object{}}}; - Template T("{{#a.b.c}}Here{{/a.b.c}} == "); + Value D = Object{{"a", Object{{}}}}; + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#a.b.c}}Here{{/a.b.c}} == ", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -625,7 +800,10 @@ TEST(MustacheSections, DottedNamesBrokenChains) { TEST(MustacheSections, SurroundingWhitespace) { Value D = Object{{"boolean", true}}; - Template T(" | {{#boolean}}\t|\t{{/boolean}} | \n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" | {{#boolean}}\t|\t{{/boolean}} | \n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -634,7 +812,11 @@ TEST(MustacheSections, SurroundingWhitespace) { TEST(MustacheSections, InternalWhitespace) { Value D = Object{{"boolean", true}}; - Template T(" | {{#boolean}} {{! Important Whitespace }}\n {{/boolean}} | \n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" | {{#boolean}} {{! Important Whitespace }}\n {{/boolean}} | \n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -643,7 +825,11 @@ TEST(MustacheSections, InternalWhitespace) { TEST(MustacheSections, IndentedInlineSections) { Value D = Object{{"boolean", true}}; - Template T(" {{#boolean}}YES{{/boolean}}\n {{#boolean}}GOOD{{/boolean}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{#boolean}}YES{{/boolean}}\n {{#boolean}}GOOD{{/boolean}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -652,7 +838,10 @@ TEST(MustacheSections, IndentedInlineSections) { TEST(MustacheSections, StandaloneLines) { Value D = Object{{"boolean", true}}; - Template T("| This Is\n{{#boolean}}\n|\n{{/boolean}}\n| A Line\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| This Is\n{{#boolean}}\n|\n{{/boolean}}\n| A Line\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -661,7 +850,10 @@ TEST(MustacheSections, StandaloneLines) { TEST(MustacheSections, IndentedStandaloneLines) { Value D = Object{{"boolean", true}}; - Template T("| This Is\n {{#boolean}}\n|\n {{/boolean}}\n| A Line\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| This Is\n {{#boolean}}\n|\n {{/boolean}}\n| A Line\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -670,7 +862,10 @@ TEST(MustacheSections, IndentedStandaloneLines) { TEST(MustacheSections, StandaloneLineEndings) { Value D = Object{{"boolean", true}}; - Template T("|\r\n{{#boolean}}\r\n{{/boolean}}\r\n|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|\r\n{{#boolean}}\r\n{{/boolean}}\r\n|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -679,7 +874,10 @@ TEST(MustacheSections, StandaloneLineEndings) { TEST(MustacheSections, StandaloneWithoutPreviousLine) { Value D = Object{{"boolean", true}}; - Template T(" {{#boolean}}\n#{{/boolean}}\n/"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{#boolean}}\n#{{/boolean}}\n/", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -688,7 +886,10 @@ TEST(MustacheSections, StandaloneWithoutPreviousLine) { TEST(MustacheSections, StandaloneWithoutNewline) { Value D = Object{{"boolean", true}}; - Template T("#{{#boolean}}\n/\n {{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("#{{#boolean}}\n/\n {{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -697,7 +898,10 @@ TEST(MustacheSections, StandaloneWithoutNewline) { TEST(MustacheSections, Padding) { Value D = Object{{"boolean", true}}; - Template T("|{{# boolean }}={{/ boolean }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{# boolean }}={{/ boolean }}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -706,7 +910,10 @@ TEST(MustacheSections, Padding) { TEST(MustacheInvertedSections, Falsey) { Value D = Object{{"boolean", false}}; - Template T("{{^boolean}}This should be rendered.{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^boolean}}This should be rendered.{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -715,7 +922,10 @@ TEST(MustacheInvertedSections, Falsey) { TEST(MustacheInvertedSections, Truthy) { Value D = Object{{"boolean", true}}; - Template T("{{^boolean}}This should not be rendered.{{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^boolean}}This should not be rendered.{{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -724,7 +934,10 @@ TEST(MustacheInvertedSections, Truthy) { TEST(MustacheInvertedSections, NullIsFalsey) { Value D = Object{{"null", nullptr}}; - Template T("{{^null}}This should be rendered.{{/null}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^null}}This should be rendered.{{/null}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -733,7 +946,10 @@ TEST(MustacheInvertedSections, NullIsFalsey) { TEST(MustacheInvertedSections, Context) { Value D = Object{{"context", Object{{"name", "Joe"}}}}; - Template T("{{^context}}Hi {{name}}.{{/context}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^context}}Hi {{name}}.{{/context}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -743,7 +959,10 @@ TEST(MustacheInvertedSections, Context) { TEST(MustacheInvertedSections, List) { Value D = Object{ {"list", Array{Object{{"n", 1}}, Object{{"n", 2}}, Object{{"n", 3}}}}}; - Template T("{{^list}}{{n}}{{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^list}}{{n}}{{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -752,7 +971,10 @@ TEST(MustacheInvertedSections, List) { TEST(MustacheInvertedSections, EmptyList) { Value D = Object{{"list", Array{}}}; - Template T("{{^list}}Yay lists!{{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^list}}Yay lists!{{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -761,8 +983,12 @@ TEST(MustacheInvertedSections, EmptyList) { TEST(MustacheInvertedSections, Doubled) { Value D = Object{{"bool", false}, {"two", "second"}}; + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); Template T("{{^bool}}\n* first\n{{/bool}}\n* " - "{{two}}\n{{^bool}}\n* third\n{{/bool}}\n"); + "{{two}}\n{{^bool}}\n* third\n{{/bool}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -771,7 +997,10 @@ TEST(MustacheInvertedSections, Doubled) { TEST(MustacheInvertedSections, NestedFalsey) { Value D = Object{{"bool", false}}; - Template T("| A {{^bool}}B {{^bool}}C{{/bool}} D{{/bool}} E |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| A {{^bool}}B {{^bool}}C{{/bool}} D{{/bool}} E |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -780,7 +1009,10 @@ TEST(MustacheInvertedSections, NestedFalsey) { TEST(MustacheInvertedSections, NestedTruthy) { Value D = Object{{"bool", true}}; - Template T("| A {{^bool}}B {{^bool}}C{{/bool}} D{{/bool}} E |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| A {{^bool}}B {{^bool}}C{{/bool}} D{{/bool}} E |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -789,7 +1021,10 @@ TEST(MustacheInvertedSections, NestedTruthy) { TEST(MustacheInvertedSections, ContextMisses) { Value D = Object{}; - Template T("[{{^missing}}Cannot find key 'missing'!{{/missing}}]"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[{{^missing}}Cannot find key 'missing'!{{/missing}}]", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -798,7 +1033,10 @@ TEST(MustacheInvertedSections, ContextMisses) { TEST(MustacheInvertedSections, DottedNamesTruthy) { Value D = Object{{"a", Object{{"b", Object{{"c", true}}}}}}; - Template T("{{^a.b.c}}Not Here{{/a.b.c}} == "); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^a.b.c}}Not Here{{/a.b.c}} == ", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -807,7 +1045,10 @@ TEST(MustacheInvertedSections, DottedNamesTruthy) { TEST(MustacheInvertedSections, DottedNamesFalsey) { Value D = Object{{"a", Object{{"b", Object{{"c", false}}}}}}; - Template T("{{^a.b.c}}Not Here{{/a.b.c}} == Not Here"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^a.b.c}}Not Here{{/a.b.c}} == Not Here", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -816,7 +1057,10 @@ TEST(MustacheInvertedSections, DottedNamesFalsey) { TEST(MustacheInvertedSections, DottedNamesBrokenChains) { Value D = Object{{"a", Object{}}}; - Template T("{{^a.b.c}}Not Here{{/a.b.c}} == Not Here"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{^a.b.c}}Not Here{{/a.b.c}} == Not Here", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -825,7 +1069,10 @@ TEST(MustacheInvertedSections, DottedNamesBrokenChains) { TEST(MustacheInvertedSections, SurroundingWhitespace) { Value D = Object{{"boolean", false}}; - Template T(" | {{^boolean}}\t|\t{{/boolean}} | \n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" | {{^boolean}}\t|\t{{/boolean}} | \n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -834,7 +1081,11 @@ TEST(MustacheInvertedSections, SurroundingWhitespace) { TEST(MustacheInvertedSections, InternalWhitespace) { Value D = Object{{"boolean", false}}; - Template T(" | {{^boolean}} {{! Important Whitespace }}\n {{/boolean}} | \n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" | {{^boolean}} {{! Important Whitespace }}\n {{/boolean}} | \n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -843,7 +1094,11 @@ TEST(MustacheInvertedSections, InternalWhitespace) { TEST(MustacheInvertedSections, IndentedInlineSections) { Value D = Object{{"boolean", false}}; - Template T(" {{^boolean}}NO{{/boolean}}\n {{^boolean}}WAY{{/boolean}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{^boolean}}NO{{/boolean}}\n {{^boolean}}WAY{{/boolean}}\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -852,7 +1107,10 @@ TEST(MustacheInvertedSections, IndentedInlineSections) { TEST(MustacheInvertedSections, StandaloneLines) { Value D = Object{{"boolean", false}}; - Template T("| This Is\n{{^boolean}}\n|\n{{/boolean}}\n| A Line\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| This Is\n{{^boolean}}\n|\n{{/boolean}}\n| A Line\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -861,7 +1119,10 @@ TEST(MustacheInvertedSections, StandaloneLines) { TEST(MustacheInvertedSections, StandaloneIndentedLines) { Value D = Object{{"boolean", false}}; - Template T("| This Is\n {{^boolean}}\n|\n {{/boolean}}\n| A Line\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| This Is\n {{^boolean}}\n|\n {{/boolean}}\n| A Line\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -870,7 +1131,10 @@ TEST(MustacheInvertedSections, StandaloneIndentedLines) { TEST(MustacheInvertedSections, StandaloneLineEndings) { Value D = Object{{"boolean", false}}; - Template T("|\r\n{{^boolean}}\r\n{{/boolean}}\r\n|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|\r\n{{^boolean}}\r\n{{/boolean}}\r\n|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -879,7 +1143,10 @@ TEST(MustacheInvertedSections, StandaloneLineEndings) { TEST(MustacheInvertedSections, StandaloneWithoutPreviousLine) { Value D = Object{{"boolean", false}}; - Template T(" {{^boolean}}\n^{{/boolean}}\n/"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{^boolean}}\n^{{/boolean}}\n/", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -888,7 +1155,10 @@ TEST(MustacheInvertedSections, StandaloneWithoutPreviousLine) { TEST(MustacheInvertedSections, StandaloneWithoutNewline) { Value D = Object{{"boolean", false}}; - Template T("^{{^boolean}}\n/\n {{/boolean}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("^{{^boolean}}\n/\n {{/boolean}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -897,7 +1167,10 @@ TEST(MustacheInvertedSections, StandaloneWithoutNewline) { TEST(MustacheInvertedSections, Padding) { Value D = Object{{"boolean", false}}; - Template T("|{{^ boolean }}={{/ boolean }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{^ boolean }}={{/ boolean }}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -906,7 +1179,10 @@ TEST(MustacheInvertedSections, Padding) { TEST(MustachePartials, BasicBehavior) { Value D = Object{}; - Template T("{{>text}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{>text}}", Ctx); T.registerPartial("text", "from partial"); std::string Out; raw_string_ostream OS(Out); @@ -916,7 +1192,10 @@ TEST(MustachePartials, BasicBehavior) { TEST(MustachePartials, FailedLookup) { Value D = Object{}; - Template T("{{>text}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{>text}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -925,7 +1204,10 @@ TEST(MustachePartials, FailedLookup) { TEST(MustachePartials, Context) { Value D = Object{{"text", "content"}}; - Template T("{{>partial}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{>partial}}", Ctx); T.registerPartial("partial", "*{{text}}*"); std::string Out; raw_string_ostream OS(Out); @@ -937,7 +1219,10 @@ TEST(MustachePartials, Recursion) { Value D = Object{{"content", "X"}, {"nodes", Array{Object{{"content", "Y"}, {"nodes", Array{}}}}}}; - Template T("{{>node}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{>node}}", Ctx); T.registerPartial("node", "{{content}}({{#nodes}}{{>node}}{{/nodes}})"); std::string Out; raw_string_ostream OS(Out); @@ -947,7 +1232,10 @@ TEST(MustachePartials, Recursion) { TEST(MustachePartials, Nested) { Value D = Object{{"a", "hello"}, {"b", "world"}}; - Template T("{{>outer}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{>outer}}", Ctx); T.registerPartial("outer", "*{{a}} {{>inner}}*"); T.registerPartial("inner", "{{b}}!"); std::string Out; @@ -958,7 +1246,10 @@ TEST(MustachePartials, Nested) { TEST(MustachePartials, SurroundingWhitespace) { Value D = Object{}; - Template T("| {{>partial}} |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| {{>partial}} |", Ctx); T.registerPartial("partial", "\t|\t"); std::string Out; raw_string_ostream OS(Out); @@ -968,7 +1259,10 @@ TEST(MustachePartials, SurroundingWhitespace) { TEST(MustachePartials, InlineIndentation) { Value D = Object{{"data", "|"}}; - Template T(" {{data}} {{> partial}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{data}} {{> partial}}\n", Ctx); T.registerPartial("partial", "<\n<"); std::string Out; raw_string_ostream OS(Out); @@ -978,7 +1272,10 @@ TEST(MustachePartials, InlineIndentation) { TEST(MustachePartials, PaddingWhitespace) { Value D = Object{{"boolean", true}}; - Template T("|{{> partial }}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{> partial }}|", Ctx); T.registerPartial("partial", "[]"); std::string Out; raw_string_ostream OS(Out); @@ -987,7 +1284,10 @@ TEST(MustachePartials, PaddingWhitespace) { } TEST(MustachePartials, StandaloneIndentation) { - mustache::Template T("\\\n {{>partial}}\n/\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + mustache::Template T("\\\n {{>partial}}\n/\n", Ctx); T.registerPartial("partial", "|\n{{{content}}}\n|\n"); std::string O; raw_string_ostream OS(O); @@ -998,7 +1298,10 @@ TEST(MustachePartials, StandaloneIndentation) { TEST(MustacheLambdas, BasicInterpolation) { Value D = Object{}; - Template T("Hello, {{lambda}}!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{lambda}}!", Ctx); Lambda L = []() -> llvm::json::Value { return "World"; }; T.registerLambda("lambda", L); std::string Out; @@ -1009,7 +1312,10 @@ TEST(MustacheLambdas, BasicInterpolation) { TEST(MustacheLambdas, InterpolationExpansion) { Value D = Object{{"planet", "World"}}; - Template T("Hello, {{lambda}}!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{lambda}}!", Ctx); Lambda L = []() -> llvm::json::Value { return "{{planet}}"; }; T.registerLambda("lambda", L); std::string Out; @@ -1020,7 +1326,10 @@ TEST(MustacheLambdas, InterpolationExpansion) { TEST(MustacheLambdas, BasicMultipleCalls) { Value D = Object{}; - Template T("{{lambda}} == {{lambda}} == {{lambda}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{lambda}} == {{lambda}} == {{lambda}}", Ctx); int I = 0; Lambda L = [&I]() -> llvm::json::Value { I += 1; @@ -1035,7 +1344,10 @@ TEST(MustacheLambdas, BasicMultipleCalls) { TEST(MustacheLambdas, Escaping) { Value D = Object{}; - Template T("<{{lambda}}{{&lambda}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("<{{lambda}}{{&lambda}}", Ctx); Lambda L = []() -> llvm::json::Value { return ">"; }; T.registerLambda("lambda", L); std::string Out; @@ -1046,7 +1358,10 @@ TEST(MustacheLambdas, Escaping) { TEST(MustacheLambdas, Sections) { Value D = Object{}; - Template T("<{{#lambda}}{{x}}{{/lambda}}>"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("<{{#lambda}}{{x}}{{/lambda}}>", Ctx); SectionLambda L = [](StringRef Text) -> llvm::json::Value { if (Text == "{{x}}") { return "yes"; @@ -1064,7 +1379,10 @@ TEST(MustacheLambdas, SectionExpansion) { Value D = Object{ {"planet", "Earth"}, }; - Template T("<{{#lambda}}-{{/lambda}}>"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("<{{#lambda}}-{{/lambda}}>", Ctx); SectionLambda L = [](StringRef Text) -> llvm::json::Value { SmallString<128> Result; Result += Text; @@ -1081,7 +1399,10 @@ TEST(MustacheLambdas, SectionExpansion) { TEST(MustacheLambdas, SectionsMultipleCalls) { Value D = Object{}; - Template T("{{#lambda}}FILE{{/lambda}} != {{#lambda}}LINE{{/lambda}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#lambda}}FILE{{/lambda}} != {{#lambda}}LINE{{/lambda}}", Ctx); SectionLambda L = [](StringRef Text) -> llvm::json::Value { SmallString<128> Result; Result += "__"; @@ -1098,7 +1419,10 @@ TEST(MustacheLambdas, SectionsMultipleCalls) { TEST(MustacheLambdas, InvertedSections) { Value D = Object{{"static", "static"}}; - Template T("<{{^lambda}}{{static}}{{/lambda}}>"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("<{{^lambda}}{{static}}{{/lambda}}>", Ctx); SectionLambda L = [](StringRef Text) -> llvm::json::Value { return false; }; T.registerLambda("lambda", L); std::string Out; @@ -1110,7 +1434,10 @@ TEST(MustacheLambdas, InvertedSections) { TEST(MustacheComments, Inline) { // Comment blocks should be removed from the template. Value D = {}; - Template T("12345{{! Comment Block! }}67890"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("12345{{! Comment Block! }}67890", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1120,7 +1447,10 @@ TEST(MustacheComments, Inline) { TEST(MustacheComments, Multiline) { // Multiline comments should be permitted. Value D = {}; - Template T("12345{{!\n This is a\n multi-line comment...\n}}67890\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("12345{{!\n This is a\n multi-line comment...\n}}67890\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1130,7 +1460,10 @@ TEST(MustacheComments, Multiline) { TEST(MustacheComments, Standalone) { // All standalone comment lines should be removed. Value D = {}; - Template T("Begin.\n{{! Comment Block! }}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n{{! Comment Block! }}\nEnd.\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1140,7 +1473,10 @@ TEST(MustacheComments, Standalone) { TEST(MustacheComments, IndentedStandalone) { // All standalone comment lines should be removed. Value D = {}; - Template T("Begin.\n {{! Indented Comment Block! }}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n {{! Indented Comment Block! }}\nEnd.\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1150,7 +1486,10 @@ TEST(MustacheComments, IndentedStandalone) { TEST(MustacheComments, StandaloneLineEndings) { // "\r\n" should be considered a newline for standalone tags. Value D = {}; - Template T("|\r\n{{! Standalone Comment }}\r\n|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|\r\n{{! Standalone Comment }}\r\n|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1160,7 +1499,10 @@ TEST(MustacheComments, StandaloneLineEndings) { TEST(MustacheComments, StandaloneWithoutPreviousLine) { // Standalone tags should not require a newline to precede them. Value D = {}; - Template T(" {{! I'm Still Standalone }}\n!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{! I'm Still Standalone }}\n!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1170,7 +1512,10 @@ TEST(MustacheComments, StandaloneWithoutPreviousLine) { TEST(MustacheComments, StandaloneWithoutNewline) { // Standalone tags should not require a newline to follow them. Value D = {}; - Template T("!\n {{! I'm Still Standalone }}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("!\n {{! I'm Still Standalone }}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1180,7 +1525,10 @@ TEST(MustacheComments, StandaloneWithoutNewline) { TEST(MustacheComments, MultilineStandalone) { // All standalone comment lines should be removed. Value D = {}; - Template T("Begin.\n{{!\nSomething's going on here...\n}}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n{{!\nSomething's going on here...\n}}\nEnd.\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1190,7 +1538,11 @@ TEST(MustacheComments, MultilineStandalone) { TEST(MustacheComments, IndentedMultilineStandalone) { // All standalone comment lines should be removed. Value D = {}; - Template T("Begin.\n {{!\n Something's going on here...\n }}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n {{!\n Something's going on here...\n }}\nEnd.\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1200,7 +1552,10 @@ TEST(MustacheComments, IndentedMultilineStandalone) { TEST(MustacheComments, IndentedInline) { // Inline comments should not strip whitespace. Value D = {}; - Template T(" 12 {{! 34 }}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" 12 {{! 34 }}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1210,7 +1565,10 @@ TEST(MustacheComments, IndentedInline) { TEST(MustacheComments, SurroundingWhitespace) { // Comment removal should preserve surrounding whitespace. Value D = {}; - Template T("12345 {{! Comment Block! }} 67890"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("12345 {{! Comment Block! }} 67890", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1221,7 +1579,10 @@ TEST(MustacheComments, VariableNameCollision) { // Comments must never render, even if a variable with the same name exists. Value D = Object{ {"! comment", 1}, {"! comment ", 2}, {"!comment", 3}, {"comment", 4}}; - Template T("comments never show: >{{! comment }}<"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("comments never show: >{{! comment }}<", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1234,7 +1595,10 @@ TEST(MustacheComments, VariableNameCollision) { // implemented, these assertions should be changed back to EXPECT_EQ. TEST(MustacheTripleMustache, Basic) { Value D = Object{{"subject", "<b>World</b>"}}; - Template T("Hello, {{{subject}}}!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Hello, {{{subject}}}!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1243,7 +1607,10 @@ TEST(MustacheTripleMustache, Basic) { TEST(MustacheTripleMustache, IntegerInterpolation) { Value D = Object{{"mph", 85}}; - Template T("{{{mph}}} miles an hour!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{{mph}}} miles an hour!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1252,7 +1619,10 @@ TEST(MustacheTripleMustache, IntegerInterpolation) { TEST(MustacheTripleMustache, DecimalInterpolation) { Value D = Object{{"power", 1.21}}; - Template T("{{{power}}} jiggawatts!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{{power}}} jiggawatts!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1261,7 +1631,10 @@ TEST(MustacheTripleMustache, DecimalInterpolation) { TEST(MustacheTripleMustache, NullInterpolation) { Value D = Object{{"cannot", nullptr}}; - Template T("I ({{{cannot}}}) be seen!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("I ({{{cannot}}}) be seen!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1270,7 +1643,10 @@ TEST(MustacheTripleMustache, NullInterpolation) { TEST(MustacheTripleMustache, ContextMissInterpolation) { Value D = Object{}; - Template T("I ({{{cannot}}}) be seen!"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("I ({{{cannot}}}) be seen!", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1279,7 +1655,10 @@ TEST(MustacheTripleMustache, ContextMissInterpolation) { TEST(MustacheTripleMustache, DottedNames) { Value D = Object{{"person", Object{{"name", "<b>Joe</b>"}}}}; - Template T("{{{person.name}}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{{person.name}}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1288,7 +1667,10 @@ TEST(MustacheTripleMustache, DottedNames) { TEST(MustacheTripleMustache, ImplicitIterator) { Value D = Object{{"list", Array{"<a>", "<b>"}}}; - Template T("{{#list}}({{{.}}}){{/list}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{#list}}({{{.}}}){{/list}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1297,7 +1679,10 @@ TEST(MustacheTripleMustache, ImplicitIterator) { TEST(MustacheTripleMustache, SurroundingWhitespace) { Value D = Object{{"string", "---"}}; - Template T("| {{{string}}} |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| {{{string}}} |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1306,7 +1691,10 @@ TEST(MustacheTripleMustache, SurroundingWhitespace) { TEST(MustacheTripleMustache, Standalone) { Value D = Object{{"string", "---"}}; - Template T(" {{{string}}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{{string}}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1315,7 +1703,10 @@ TEST(MustacheTripleMustache, Standalone) { TEST(MustacheTripleMustache, WithPadding) { Value D = Object{{"string", "---"}}; - Template T("|{{{ string }}}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{{ string }}}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1324,7 +1715,10 @@ TEST(MustacheTripleMustache, WithPadding) { TEST(MustacheDelimiters, PairBehavior) { Value D = Object{{"text", "Hey!"}}; - Template T("{{=<% %>=}}(<%text%>)"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("{{=<% %>=}}(<%text%>)", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1333,7 +1727,10 @@ TEST(MustacheDelimiters, PairBehavior) { TEST(MustacheDelimiters, SpecialCharacters) { Value D = Object{{"text", "It worked!"}}; - Template T("({{=[ ]=}}[text])"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("({{=[ ]=}}[text])", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1342,9 +1739,12 @@ TEST(MustacheDelimiters, SpecialCharacters) { TEST(MustacheDelimiters, Sections) { Value D = Object{{"section", true}, {"data", "I got interpolated."}}; - auto T = - Template("[\n{{#section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " - "| | =}}\n|#section|\n {{data}}\n |data|\n|/section|\n]\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[\n{{#section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " + "| | =}}\n|#section|\n {{data}}\n |data|\n|/section|\n]\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1355,9 +1755,12 @@ TEST(MustacheDelimiters, Sections) { TEST(MustacheDelimiters, InvertedSections) { Value D = Object{{"section", false}, {"data", "I got interpolated."}}; - auto T = - Template("[\n{{^section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " - "| | =}}\n|^section|\n {{data}}\n |data|\n|/section|\n]\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[\n{{^section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " + "| | =}}\n|^section|\n {{data}}\n |data|\n|/section|\n]\n", + Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1368,7 +1771,10 @@ TEST(MustacheDelimiters, InvertedSections) { TEST(MustacheDelimiters, PartialInheritence) { Value D = Object{{"value", "yes"}}; - Template T("[ {{>include}} ]\n{{= | | =}}\n[ |>include| ]\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[ {{>include}} ]\n{{= | | =}}\n[ |>include| ]\n", Ctx); T.registerPartial("include", ".{{value}}."); std::string Out; raw_string_ostream OS(Out); @@ -1378,7 +1784,10 @@ TEST(MustacheDelimiters, PartialInheritence) { TEST(MustacheDelimiters, PostPartialBehavior) { Value D = Object{{"value", "yes"}}; - Template T("[ {{>include}} ]\n[ .{{value}}. .|value|. ]\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("[ {{>include}} ]\n[ .{{value}}. .|value|. ]\n", Ctx); T.registerPartial("include", ".{{value}}. {{= | | =}} .|value|."); std::string Out; raw_string_ostream OS(Out); @@ -1388,7 +1797,10 @@ TEST(MustacheDelimiters, PostPartialBehavior) { TEST(MustacheDelimiters, SurroundingWhitespace) { Value D = Object{}; - Template T("| {{=@ @=}} |"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("| {{=@ @=}} |", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1397,7 +1809,10 @@ TEST(MustacheDelimiters, SurroundingWhitespace) { TEST(MustacheDelimiters, OutlyingWhitespaceInline) { Value D = Object{}; - Template T(" | {{=@ @=}}\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" | {{=@ @=}}\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1406,7 +1821,10 @@ TEST(MustacheDelimiters, OutlyingWhitespaceInline) { TEST(MustacheDelimiters, StandaloneTag) { Value D = Object{}; - Template T("Begin.\n{{=@ @=}}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n{{=@ @=}}\nEnd.\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1415,7 +1833,10 @@ TEST(MustacheDelimiters, StandaloneTag) { TEST(MustacheDelimiters, IndentedStandaloneTag) { Value D = Object{}; - Template T("Begin.\n {{=@ @=}}\nEnd.\n"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("Begin.\n {{=@ @=}}\nEnd.\n", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1424,7 +1845,10 @@ TEST(MustacheDelimiters, IndentedStandaloneTag) { TEST(MustacheDelimiters, StandaloneLineEndings) { Value D = Object{}; - Template T("|\r\n{{= @ @ =}}\r\n|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|\r\n{{= @ @ =}}\r\n|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1433,7 +1857,10 @@ TEST(MustacheDelimiters, StandaloneLineEndings) { TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) { Value D = Object{}; - Template T(" {{=@ @=}}\n="); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(" {{=@ @=}}\n=", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1442,7 +1869,10 @@ TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) { TEST(MustacheDelimiters, StandaloneWithoutNewline) { Value D = Object{}; - Template T("=\n {{=@ @=}}"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("=\n {{=@ @=}}", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); @@ -1451,7 +1881,10 @@ TEST(MustacheDelimiters, StandaloneWithoutNewline) { TEST(MustacheDelimiters, PairwithPadding) { Value D = Object{}; - Template T("|{{= @ @ =}}|"); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T("|{{= @ @ =}}|", Ctx); std::string Out; raw_string_ostream OS(Out); T.render(D, OS); diff --git a/llvm/utils/gn/secondary/clang/lib/ASTMatchers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ASTMatchers/BUILD.gn index 63bf726..8fe30b8 100644 --- a/llvm/utils/gn/secondary/clang/lib/ASTMatchers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ASTMatchers/BUILD.gn @@ -9,7 +9,6 @@ static_library("ASTMatchers") { sources = [ "ASTMatchFinder.cpp", "ASTMatchersInternal.cpp", - "GtestMatchers.cpp", "LowLevelHelpers.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/ASTMatchers/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/ASTMatchers/BUILD.gn index 10f540b..56d3484 100644 --- a/llvm/utils/gn/secondary/clang/unittests/ASTMatchers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/ASTMatchers/BUILD.gn @@ -17,6 +17,5 @@ unittest("ASTMatchersTests") { "ASTMatchersNarrowingTest.cpp", "ASTMatchersNodeTest.cpp", "ASTMatchersTraversalTest.cpp", - "GtestMatchersTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn index 92e596e..8d19d30 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn @@ -21,6 +21,7 @@ unittest("ADTTests") { "BitTest.cpp", "BitVectorTest.cpp", "BitmaskEnumTest.cpp", + "BitsetTest.cpp", "BreadthFirstIteratorTest.cpp", "BumpPtrListTest.cpp", "CoalescingBitVectorTest.cpp", diff --git a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp index 9007eb3..93e2efe 100644 --- a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp +++ b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp @@ -212,7 +212,10 @@ static void runTest(StringRef InputFile) { for (Value V : *TestArray) { auto TestData = ExitOnErr(TestData::createTestData(V.getAsObject(), InputFile)); - Template T(TestData.TemplateStr); + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + MustacheContext Ctx(Allocator, Saver); + Template T(TestData.TemplateStr, Ctx); registerPartials(TestData.Partials, T); std::string ActualStr; |