diff options
Diffstat (limited to 'llvm/lib/CodeGen')
35 files changed, 822 insertions, 287 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d34fe0e..3ba4590 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -503,13 +503,7 @@ bool AsmPrinter::doInitialization(Module &M) { // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - - SmallString<128> FileName; - if (MAI->hasBasenameOnlyForFileDirective()) - FileName = llvm::sys::path::filename(M.getSourceFileName()); - else - FileName = M.getSourceFileName(); - if (MAI->hasFourStringsDotFile()) { + if (MAI->isAIX()) { const char VerStr[] = #ifdef PACKAGE_VENDOR PACKAGE_VENDOR " " @@ -520,9 +514,10 @@ bool AsmPrinter::doInitialization(Module &M) { #endif ; // TODO: Add timestamp and description. - OutStreamer->emitFileDirective(FileName, VerStr, "", ""); + OutStreamer->emitFileDirective(M.getSourceFileName(), VerStr, "", ""); } else { - OutStreamer->emitFileDirective(FileName); + OutStreamer->emitFileDirective( + llvm::sys::path::filename(M.getSourceFileName())); } } @@ -967,11 +962,10 @@ void AsmPrinter::emitFunctionHeader() { MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); OutStreamer->switchSection(MF->getSection()); - if (!MAI->hasVisibilityOnlyWithLinkage()) - emitVisibility(CurrentFnSym, F.getVisibility()); - - if (MAI->needsFunctionDescriptors()) + if (MAI->isAIX()) emitLinkage(&F, CurrentFnDescSym); + else + emitVisibility(CurrentFnSym, F.getVisibility()); emitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) @@ -1031,7 +1025,7 @@ void AsmPrinter::emitFunctionHeader() { // to emit their specific function descriptor. Right now it is only used by // the AIX target. The PowerPC 64-bit V1 ELF target also uses function // descriptors and should be converted to use this hook as well. - if (MAI->needsFunctionDescriptors()) + if (MAI->isAIX()) emitFunctionDescriptor(); // Emit the CurrentFnSym. This is a virtual function to allow targets to do @@ -2234,9 +2228,6 @@ void AsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) { // point, all the extra label is emitted, we just have to emit linkage for // those labels. if (TM.getTargetTriple().isOSBinFormatXCOFF()) { - assert(MAI->hasVisibilityOnlyWithLinkage() && - "Visibility should be handled with emitLinkage() on AIX."); - // Linkage for alias of global variable has been emitted. if (isa<GlobalVariable>(GA.getAliaseeObject())) return; @@ -2730,7 +2721,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { HasNoSplitStack = true; // Get the function symbol. - if (!MAI->needsFunctionDescriptors()) { + if (!MAI->isAIX()) { CurrentFnSym = getSymbol(&MF.getFunction()); } else { assert(TM.getTargetTriple().isOSAIX() && @@ -3923,21 +3914,22 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, if (isa<ConstantAggregateZero>(CV)) { StructType *structType; if (AliasList && (structType = llvm::dyn_cast<StructType>(CV->getType()))) { - // Handle cases of aliases to direct struct elements - const StructLayout *Layout = DL.getStructLayout(structType); - uint64_t SizeSoFar = 0; - for (unsigned int i = 0, n = structType->getNumElements(); i < n - 1; - ++i) { - uint64_t GapToNext = Layout->getElementOffset(i + 1) - SizeSoFar; - AP.OutStreamer->emitZeros(GapToNext); - SizeSoFar += GapToNext; - emitGlobalAliasInline(AP, Offset + SizeSoFar, AliasList); + unsigned numElements = {structType->getNumElements()}; + if (numElements != 0) { + // Handle cases of aliases to direct struct elements + const StructLayout *Layout = DL.getStructLayout(structType); + uint64_t SizeSoFar = 0; + for (unsigned int i = 0; i < numElements - 1; ++i) { + uint64_t GapToNext = Layout->getElementOffset(i + 1) - SizeSoFar; + AP.OutStreamer->emitZeros(GapToNext); + SizeSoFar += GapToNext; + emitGlobalAliasInline(AP, Offset + SizeSoFar, AliasList); + } + AP.OutStreamer->emitZeros(Size - SizeSoFar); + return; } - AP.OutStreamer->emitZeros(Size - SizeSoFar); - return; - } else { - return AP.OutStreamer->emitZeros(Size); } + return AP.OutStreamer->emitZeros(Size); } if (isa<UndefValue>(CV)) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ebae27e..59fc4cf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -153,7 +153,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); // FIXME: Should this happen for `asm inteldialect` as well? - if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker()) + if (!InputIsIntelDialect && !MAI->isHLASM()) OS << '\t'; while (*LastEmitted) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e1291e2..11de4b6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3789,6 +3789,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // they depend on addresses, throwing them out and rebuilding them. setCurrentDWARF5AccelTable(DWARF5AccelTableKind::CU); CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy)); + CU.updateAcceleratorTables(CTy->getScope(), CTy, RefDie); return; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 0225654..1632053 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -315,6 +315,11 @@ public: /// Get context owner's DIE. DIE *createTypeDIE(const DICompositeType *Ty); + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, + const DIE &TyDIE); + protected: ~DwarfUnit(); @@ -357,11 +362,6 @@ private: virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0; - /// If this is a named finished type then include it in the list of types for - /// the accelerator tables. - void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, - const DIE &TyDIE); - virtual bool isDwoUnit() const = 0; const MCSymbol *getCrossSectionRelativeBaseAddress() const override; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5c712e4..ba1b10e 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -152,7 +152,7 @@ static cl::opt<bool> static cl::opt<bool> EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), - cl::desc("Enable sinkinig and/cmp into branches.")); + cl::desc("Enable sinking and/cmp into branches.")); static cl::opt<bool> DisableStoreExtract( "disable-cgp-store-extract", cl::Hidden, cl::init(false), diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index f3f7ea9..aec8df9 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -108,6 +108,13 @@ static bool isNeg(Value *V); static Value *getNegOperand(Value *V); namespace { +template <typename T, typename IterT> +std::optional<T> findCommonBetweenCollections(IterT A, IterT B) { + auto Common = llvm::find_if(A, [B](T I) { return llvm::is_contained(B, I); }); + if (Common != A.end()) + return std::make_optional(*Common); + return std::nullopt; +} class ComplexDeinterleavingLegacyPass : public FunctionPass { public: @@ -144,6 +151,7 @@ private: friend class ComplexDeinterleavingGraph; using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>; using RawNodePtr = ComplexDeinterleavingCompositeNode *; + bool OperandsValid = true; public: ComplexDeinterleavingOperation Operation; @@ -160,7 +168,11 @@ public: SmallVector<RawNodePtr> Operands; Value *ReplacementNode = nullptr; - void addOperand(NodePtr Node) { Operands.push_back(Node.get()); } + void addOperand(NodePtr Node) { + if (!Node || !Node.get()) + OperandsValid = false; + Operands.push_back(Node.get()); + } void dump() { dump(dbgs()); } void dump(raw_ostream &OS) { @@ -194,6 +206,8 @@ public: PrintNodeRef(Op); } } + + bool areOperandsValid() { return OperandsValid; } }; class ComplexDeinterleavingGraph { @@ -293,7 +307,7 @@ private: NodePtr submitCompositeNode(NodePtr Node) { CompositeNodes.push_back(Node); - if (Node->Real && Node->Imag) + if (Node->Real) CachedResult[{Node->Real, Node->Imag}] = Node; return Node; } @@ -327,6 +341,8 @@ private: /// i: ai - br NodePtr identifyAdd(Instruction *Real, Instruction *Imag); NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag); + NodePtr identifyPartialReduction(Value *R, Value *I); + NodePtr identifyDotProduct(Value *Inst); NodePtr identifyNode(Value *R, Value *I); @@ -396,6 +412,7 @@ private: /// * Deinterleave the final value outside of the loop and repurpose original /// reduction users void processReductionOperation(Value *OperationReplacement, RawNodePtr Node); + void processReductionSingle(Value *OperationReplacement, RawNodePtr Node); public: void dump() { dump(dbgs()); } @@ -891,17 +908,163 @@ ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real, } ComplexDeinterleavingGraph::NodePtr -ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) { - LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n"); - assert(R->getType() == I->getType() && - "Real and imaginary parts should not have different types"); +ComplexDeinterleavingGraph::identifyDotProduct(Value *V) { + + if (!TL->isComplexDeinterleavingOperationSupported( + ComplexDeinterleavingOperation::CDot, V->getType())) { + LLVM_DEBUG(dbgs() << "Target doesn't support complex deinterleaving " + "operation CDot with the type " + << *V->getType() << "\n"); + return nullptr; + } + + auto *Inst = cast<Instruction>(V); + auto *RealUser = cast<Instruction>(*Inst->user_begin()); + + NodePtr CN = + prepareCompositeNode(ComplexDeinterleavingOperation::CDot, Inst, nullptr); + + NodePtr ANode; + + const Intrinsic::ID PartialReduceInt = + Intrinsic::experimental_vector_partial_reduce_add; + + Value *AReal = nullptr; + Value *AImag = nullptr; + Value *BReal = nullptr; + Value *BImag = nullptr; + Value *Phi = nullptr; + + auto UnwrapCast = [](Value *V) -> Value * { + if (auto *CI = dyn_cast<CastInst>(V)) + return CI->getOperand(0); + return V; + }; + + auto PatternRot0 = m_Intrinsic<PartialReduceInt>( + m_Intrinsic<PartialReduceInt>(m_Value(Phi), + m_Mul(m_Value(BReal), m_Value(AReal))), + m_Neg(m_Mul(m_Value(BImag), m_Value(AImag)))); + + auto PatternRot270 = m_Intrinsic<PartialReduceInt>( + m_Intrinsic<PartialReduceInt>( + m_Value(Phi), m_Neg(m_Mul(m_Value(BReal), m_Value(AImag)))), + m_Mul(m_Value(BImag), m_Value(AReal))); + + if (match(Inst, PatternRot0)) { + CN->Rotation = ComplexDeinterleavingRotation::Rotation_0; + } else if (match(Inst, PatternRot270)) { + CN->Rotation = ComplexDeinterleavingRotation::Rotation_270; + } else { + Value *A0, *A1; + // The rotations 90 and 180 share the same operation pattern, so inspect the + // order of the operands, identifying where the real and imaginary + // components of A go, to discern between the aforementioned rotations. + auto PatternRot90Rot180 = m_Intrinsic<PartialReduceInt>( + m_Intrinsic<PartialReduceInt>(m_Value(Phi), + m_Mul(m_Value(BReal), m_Value(A0))), + m_Mul(m_Value(BImag), m_Value(A1))); + + if (!match(Inst, PatternRot90Rot180)) + return nullptr; + + A0 = UnwrapCast(A0); + A1 = UnwrapCast(A1); + + // Test if A0 is real/A1 is imag + ANode = identifyNode(A0, A1); + if (!ANode) { + // Test if A0 is imag/A1 is real + ANode = identifyNode(A1, A0); + // Unable to identify operand components, thus unable to identify rotation + if (!ANode) + return nullptr; + CN->Rotation = ComplexDeinterleavingRotation::Rotation_90; + AReal = A1; + AImag = A0; + } else { + AReal = A0; + AImag = A1; + CN->Rotation = ComplexDeinterleavingRotation::Rotation_180; + } + } + + AReal = UnwrapCast(AReal); + AImag = UnwrapCast(AImag); + BReal = UnwrapCast(BReal); + BImag = UnwrapCast(BImag); + + VectorType *VTy = cast<VectorType>(V->getType()); + Type *ExpectedOperandTy = VectorType::getSubdividedVectorType(VTy, 2); + if (AReal->getType() != ExpectedOperandTy) + return nullptr; + if (AImag->getType() != ExpectedOperandTy) + return nullptr; + if (BReal->getType() != ExpectedOperandTy) + return nullptr; + if (BImag->getType() != ExpectedOperandTy) + return nullptr; + + if (Phi->getType() != VTy && RealUser->getType() != VTy) + return nullptr; + + NodePtr Node = identifyNode(AReal, AImag); + + // In the case that a node was identified to figure out the rotation, ensure + // that trying to identify a node with AReal and AImag post-unwrap results in + // the same node + if (ANode && Node != ANode) { + LLVM_DEBUG( + dbgs() + << "Identified node is different from previously identified node. " + "Unable to confidently generate a complex operation node\n"); + return nullptr; + } + + CN->addOperand(Node); + CN->addOperand(identifyNode(BReal, BImag)); + CN->addOperand(identifyNode(Phi, RealUser)); + + return submitCompositeNode(CN); +} + +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyPartialReduction(Value *R, Value *I) { + // Partial reductions don't support non-vector types, so check these first + if (!isa<VectorType>(R->getType()) || !isa<VectorType>(I->getType())) + return nullptr; + + auto CommonUser = + findCommonBetweenCollections<Value *>(R->users(), I->users()); + if (!CommonUser) + return nullptr; + + auto *IInst = dyn_cast<IntrinsicInst>(*CommonUser); + if (!IInst || IInst->getIntrinsicID() != + Intrinsic::experimental_vector_partial_reduce_add) + return nullptr; + + if (NodePtr CN = identifyDotProduct(IInst)) + return CN; + + return nullptr; +} +ComplexDeinterleavingGraph::NodePtr +ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) { auto It = CachedResult.find({R, I}); if (It != CachedResult.end()) { LLVM_DEBUG(dbgs() << " - Folding to existing node\n"); return It->second; } + if (NodePtr CN = identifyPartialReduction(R, I)) + return CN; + + bool IsReduction = RealPHI == R && (!ImagPHI || ImagPHI == I); + if (!IsReduction && R->getType() != I->getType()) + return nullptr; + if (NodePtr CN = identifySplat(R, I)) return CN; @@ -1427,12 +1590,20 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) { if (It != RootToNode.end()) { auto RootNode = It->second; assert(RootNode->Operation == - ComplexDeinterleavingOperation::ReductionOperation); + ComplexDeinterleavingOperation::ReductionOperation || + RootNode->Operation == + ComplexDeinterleavingOperation::ReductionSingle); // Find out which part, Real or Imag, comes later, and only if we come to // the latest part, add it to OrderedRoots. auto *R = cast<Instruction>(RootNode->Real); - auto *I = cast<Instruction>(RootNode->Imag); - auto *ReplacementAnchor = R->comesBefore(I) ? I : R; + auto *I = RootNode->Imag ? cast<Instruction>(RootNode->Imag) : nullptr; + + Instruction *ReplacementAnchor; + if (I) + ReplacementAnchor = R->comesBefore(I) ? I : R; + else + ReplacementAnchor = R; + if (ReplacementAnchor != RootI) return false; OrderedRoots.push_back(RootI); @@ -1523,7 +1694,6 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() { for (size_t j = i + 1; j < OperationInstruction.size(); ++j) { if (Processed[j]) continue; - auto *Real = OperationInstruction[i]; auto *Imag = OperationInstruction[j]; if (Real->getType() != Imag->getType()) @@ -1556,6 +1726,28 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() { break; } } + + auto *Real = OperationInstruction[i]; + // We want to check that we have 2 operands, but the function attributes + // being counted as operands bloats this value. + if (Real->getNumOperands() < 2) + continue; + + RealPHI = ReductionInfo[Real].first; + ImagPHI = nullptr; + PHIsFound = false; + auto Node = identifyNode(Real->getOperand(0), Real->getOperand(1)); + if (Node && PHIsFound) { + LLVM_DEBUG( + dbgs() << "Identified single reduction starting from instruction: " + << *Real << "/" << *ReductionInfo[Real].second << "\n"); + Processed[i] = true; + auto RootNode = prepareCompositeNode( + ComplexDeinterleavingOperation::ReductionSingle, Real, nullptr); + RootNode->addOperand(Node); + RootToNode[Real] = RootNode; + submitCompositeNode(RootNode); + } } RealPHI = nullptr; @@ -1563,6 +1755,24 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() { } bool ComplexDeinterleavingGraph::checkNodes() { + + bool FoundDeinterleaveNode = false; + for (NodePtr N : CompositeNodes) { + if (!N->areOperandsValid()) + return false; + if (N->Operation == ComplexDeinterleavingOperation::Deinterleave) + FoundDeinterleaveNode = true; + } + + // We need a deinterleave node in order to guarantee that we're working with + // complex numbers. + if (!FoundDeinterleaveNode) { + LLVM_DEBUG( + dbgs() << "Couldn't find a deinterleave node within the graph, cannot " + "guarantee safety during graph transformation.\n"); + return false; + } + // Collect all instructions from roots to leaves SmallPtrSet<Instruction *, 16> AllInstructions; SmallVector<Instruction *, 8> Worklist; @@ -1831,7 +2041,7 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) { ComplexDeinterleavingGraph::NodePtr ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real, Instruction *Imag) { - if (Real != RealPHI || Imag != ImagPHI) + if (Real != RealPHI || (ImagPHI && Imag != ImagPHI)) return nullptr; PHIsFound = true; @@ -1926,6 +2136,16 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, Value *ReplacementNode; switch (Node->Operation) { + case ComplexDeinterleavingOperation::CDot: { + Value *Input0 = ReplaceOperandIfExist(Node, 0); + Value *Input1 = ReplaceOperandIfExist(Node, 1); + Value *Accumulator = ReplaceOperandIfExist(Node, 2); + assert(!Input1 || (Input0->getType() == Input1->getType() && + "Node inputs need to be of the same type")); + ReplacementNode = TL->createComplexDeinterleavingIR( + Builder, Node->Operation, Node->Rotation, Input0, Input1, Accumulator); + break; + } case ComplexDeinterleavingOperation::CAdd: case ComplexDeinterleavingOperation::CMulPartial: case ComplexDeinterleavingOperation::Symmetric: { @@ -1969,13 +2189,18 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, case ComplexDeinterleavingOperation::ReductionPHI: { // If Operation is ReductionPHI, a new empty PHINode is created. // It is filled later when the ReductionOperation is processed. + auto *OldPHI = cast<PHINode>(Node->Real); auto *VTy = cast<VectorType>(Node->Real->getType()); auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt()); - OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI; + OldToNewPHI[OldPHI] = NewPHI; ReplacementNode = NewPHI; break; } + case ComplexDeinterleavingOperation::ReductionSingle: + ReplacementNode = replaceNode(Builder, Node->Operands[0]); + processReductionSingle(ReplacementNode, Node); + break; case ComplexDeinterleavingOperation::ReductionOperation: ReplacementNode = replaceNode(Builder, Node->Operands[0]); processReductionOperation(ReplacementNode, Node); @@ -2000,6 +2225,38 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, return ReplacementNode; } +void ComplexDeinterleavingGraph::processReductionSingle( + Value *OperationReplacement, RawNodePtr Node) { + auto *Real = cast<Instruction>(Node->Real); + auto *OldPHI = ReductionInfo[Real].first; + auto *NewPHI = OldToNewPHI[OldPHI]; + auto *VTy = cast<VectorType>(Real->getType()); + auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); + + Value *Init = OldPHI->getIncomingValueForBlock(Incoming); + + IRBuilder<> Builder(Incoming->getTerminator()); + + Value *NewInit = nullptr; + if (auto *C = dyn_cast<Constant>(Init)) { + if (C->isZeroValue()) + NewInit = Constant::getNullValue(NewVTy); + } + + if (!NewInit) + NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, + {Init, Constant::getNullValue(VTy)}); + + NewPHI->addIncoming(NewInit, Incoming); + NewPHI->addIncoming(OperationReplacement, BackEdge); + + auto *FinalReduction = ReductionInfo[Real].second; + Builder.SetInsertPoint(&*FinalReduction->getParent()->getFirstInsertionPt()); + + auto *AddReduce = Builder.CreateAddReduce(OperationReplacement); + FinalReduction->replaceAllUsesWith(AddReduce); +} + void ComplexDeinterleavingGraph::processReductionOperation( Value *OperationReplacement, RawNodePtr Node) { auto *Real = cast<Instruction>(Node->Real); @@ -2059,8 +2316,13 @@ void ComplexDeinterleavingGraph::replaceNodes() { auto *RootImag = cast<Instruction>(RootNode->Imag); ReductionInfo[RootReal].first->removeIncomingValue(BackEdge); ReductionInfo[RootImag].first->removeIncomingValue(BackEdge); - DeadInstrRoots.push_back(cast<Instruction>(RootReal)); - DeadInstrRoots.push_back(cast<Instruction>(RootImag)); + DeadInstrRoots.push_back(RootReal); + DeadInstrRoots.push_back(RootImag); + } else if (RootNode->Operation == + ComplexDeinterleavingOperation::ReductionSingle) { + auto *RootInst = cast<Instruction>(RootNode->Real); + ReductionInfo[RootInst].first->removeIncomingValue(BackEdge); + DeadInstrRoots.push_back(ReductionInfo[RootInst].second); } else { assert(R && "Unable to find replacement for RootInstruction"); DeadInstrRoots.push_back(RootInstruction); diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index f8ca7e3..74f93e1 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -669,17 +669,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { if (CI->hasOneUser()) { auto *UI = cast<Instruction>(*CI->user_begin()); CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE; - uint64_t Shift; bool NeedsZExt = false; // This is a special case because instead of checking if the result is less // than zero: // bool result = memcmp(a, b, NBYTES) < 0; // Compiler is clever enough to generate the following code: // bool result = memcmp(a, b, NBYTES) >> 31; - if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && - Shift == (CI->getType()->getIntegerBitWidth() - 1)) { + if (match(UI, + m_LShr(m_Value(), + m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) { Pred = ICmpInst::ICMP_SLT; NeedsZExt = true; + } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI), + m_AllOnes()))) { + // Adjust predicate as if it compared with 0. + Pred = ICmpInst::ICMP_SGE; + } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI), + m_One()))) { + // Adjust predicate as if it compared with 0. + Pred = ICmpInst::ICMP_SLE; } else { // In case of a successful match this call will set `Pred` variable match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero())); @@ -696,17 +704,9 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { } } - // The result of memcmp is negative, zero, or positive, so produce that by - // subtracting 2 extended compare bits: sub (ugt, ult). - // If a target prefers to use selects to get -1/0/1, they should be able - // to transform this later. The inverse transform (going from selects to math) - // may not be possible in the DAG because the selects got converted into - // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs); - Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs); - Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); - Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); - return Builder.CreateSub(ZextUGT, ZextULT); + // The result of memcmp is negative, zero, or positive. + return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp, + {Loads.Lhs, Loads.Rhs}); } // This function expands the memcmp call into an inline expansion and returns diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index c20e9d0..4e3aaf5d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6864,6 +6864,23 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, }; return true; } + + // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2) + if (FalseValue.isPowerOf2() && TrueValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Not = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Not, Cond); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Not); + // The shift amount must be scalar. + LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy; + auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2()); + B.buildShl(Dest, Inner, ShAmtC, Flags); + }; + return true; + } + // select Cond, -1, C --> or (sext Cond), C if (TrueValue.isAllOnes()) { MatchInfo = [=](MachineIRBuilder &B) { @@ -7045,6 +7062,34 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, } } +// (neg (min/max x, (neg x))) --> (max/min x, (neg x)) +bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register DestReg = MI.getOperand(0).getReg(); + LLT DestTy = MRI.getType(DestReg); + + Register X; + Register Sub0; + auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0)); + if (mi_match(DestReg, MRI, + m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern), + m_GSMax(m_Reg(X), NegPattern), + m_GUMin(m_Reg(X), NegPattern), + m_GUMax(m_Reg(X), NegPattern)))))) { + MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode()); + if (isLegal({NewOpc, {DestTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(NewOpc, {DestReg}, {X, Sub0}); + }; + return true; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const { GSelect *Select = cast<GSelect>(&MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e2247f7..d0a6234 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -3022,8 +3023,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return UnableToLegalize; LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (!Ty.isScalar()) - return UnableToLegalize; + assert(!Ty.isPointerOrPointerVector() && "Can't widen type"); + if (!Ty.isScalar()) { + // We need to widen the vector element type. + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); + // We also need to adjust the MMO to turn this into a truncating store. + MachineMemOperand &MMO = **MI.memoperands_begin(); + MachineFunction &MF = MIRBuilder.getMF(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty); + MI.setMemRefs(MF, {NewMMO}); + Observer.changedInstr(MI); + return Legalized; + } Observer.changingInstr(MI); @@ -4106,10 +4118,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { unsigned StoreWidth = MemTy.getSizeInBits(); unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); - if (StoreWidth != StoreSizeInBits) { - if (SrcTy.isVector()) - return UnableToLegalize; - + if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) @@ -4131,9 +4140,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { } if (MemTy.isVector()) { - // TODO: Handle vector trunc stores if (MemTy != SrcTy) - return UnableToLegalize; + return scalarizeVectorBooleanStore(StoreMI); // TODO: We can do better than scalarizing the vector and at least split it // in half. @@ -4189,6 +4197,50 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) { + Register SrcReg = StoreMI.getValueReg(); + Register PtrReg = StoreMI.getPointerReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **StoreMI.memoperands_begin(); + LLT MemTy = MMO.getMemoryType(); + LLT MemScalarTy = MemTy.getElementType(); + MachineFunction &MF = MIRBuilder.getMF(); + + assert(SrcTy.isVector() && "Expect a vector store type"); + + if (!MemScalarTy.isByteSized()) { + // We need to build an integer scalar of the vector bit pattern. + // It's not legal for us to add padding when storing a vector. + unsigned NumBits = MemTy.getSizeInBits(); + LLT IntTy = LLT::scalar(NumBits); + auto CurrVal = MIRBuilder.buildConstant(IntTy, 0); + LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout())); + + for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) { + auto Elt = MIRBuilder.buildExtractVectorElement( + SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I)); + auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt); + auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc); + unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian() + ? (MemTy.getNumElements() - 1) - I + : I; + auto ShiftAmt = MIRBuilder.buildConstant( + IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits()); + auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt); + CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted); + } + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy); + MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO); + StoreMI.eraseFromParent(); + return Legalized; + } + + // TODO: implement simple scalarization. + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { @@ -4653,6 +4705,20 @@ LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); } +MachineInstrBuilder LegalizerHelper::createStackStoreLoad(const DstOp &Res, + const SrcOp &Val) { + LLT SrcTy = Val.getLLTTy(MRI); + Align StackTypeAlign = + std::max(getStackTemporaryAlignment(SrcTy), + getStackTemporaryAlignment(Res.getLLTTy(MRI))); + MachinePointerInfo PtrInfo; + auto StackTemp = + createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo); + + MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign); + return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign); +} + static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy) { LLT IdxTy = B.getMRI()->getType(IdxReg); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 8c1e41e..625d556 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } +unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + case TargetOpcode::G_SMIN: + return TargetOpcode::G_SMAX; + case TargetOpcode::G_SMAX: + return TargetOpcode::G_SMIN; + case TargetOpcode::G_UMIN: + return TargetOpcode::G_UMAX; + case TargetOpcode::G_UMAX: + return TargetOpcode::G_UMIN; + default: + llvm_unreachable("unrecognized opcode"); + } +} + std::optional<APInt> llvm::getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI) { std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( @@ -1517,6 +1532,18 @@ llvm::isConstantOrConstantSplatVector(MachineInstr &MI, return APInt(ScalarSize, *MaybeCst, true); } +std::optional<APFloat> +llvm::isConstantOrConstantSplatVectorFP(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + Register Def = MI.getOperand(0).getReg(); + if (auto FpConst = getFConstantVRegValWithLookThrough(Def, MRI)) + return FpConst->Value; + auto MaybeCstFP = getFConstantSplat(Def, MRI, /*allowUndef=*/false); + if (!MaybeCstFP) + return std::nullopt; + return MaybeCstFP->Value; +} + bool llvm::isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs) { switch (MI.getOpcode()) { diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 1187ad0..e920b1b 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -60,11 +60,17 @@ static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) { if (Name.starts_with("__dtrace")) return false; } - if (isCalleeOperand(CI, OpIdx) && - CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) { + if (isCalleeOperand(CI, OpIdx)) { // The operand is the callee and it has already been signed. Ignore this // because we cannot add another ptrauth bundle to the call instruction. - return false; + if (CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) + return false; + } else { + // The target of the arc-attached call must be a constant and cannot be + // parameterized. + if (CI->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall, + OpIdx)) + return false; } return true; } diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 9744c47..3367171 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -66,7 +66,7 @@ void LiveRegMatrix::init(MachineFunction &MF, LiveIntervals &pLIS, unsigned NumRegUnits = TRI->getNumRegUnits(); if (NumRegUnits != Matrix.size()) Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); - Matrix.init(LIUAlloc, NumRegUnits); + Matrix.init(*LIUAlloc, NumRegUnits); // Make sure no stale queries get reused. invalidateVirtRegs(); diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 23db09b..9bba50e8 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -46,8 +46,9 @@ static cl::opt<bool> ShowFSBranchProb( cl::desc("Print setting flow sensitive branch probabilities")); static cl::opt<unsigned> FSProfileDebugProbDiffThreshold( "fs-profile-debug-prob-diff-threshold", cl::init(10), - cl::desc("Only show debug message if the branch probility is greater than " - "this value (in percentage).")); + cl::desc( + "Only show debug message if the branch probability is greater than " + "this value (in percentage).")); static cl::opt<unsigned> FSProfileDebugBWThreshold( "fs-profile-debug-bw-threshold", cl::init(10000), diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 0f68313..05bc4cf 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -149,7 +149,7 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost", static cl::opt<bool> TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " + "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden); diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 56fffff..2e92dd8 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -29,7 +29,7 @@ namespace llvm { cl::opt<unsigned> StaticLikelyProb("static-likely-prob", cl::desc("branch probability threshold in percentage" - "to be considered very likely"), + " to be considered very likely"), cl::init(80), cl::Hidden); cl::opt<unsigned> ProfileLikelyProb( diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 3a9bdde..5c9ca91 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1170,6 +1170,9 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (getFlags() & MachineMemOperand::MOTargetFlag3) OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) << "\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag4) + OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag4) + << "\" "; } else { if (getFlags() & MachineMemOperand::MOTargetFlag1) OS << "\"MOTargetFlag1\" "; @@ -1177,6 +1180,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "\"MOTargetFlag2\" "; if (getFlags() & MachineMemOperand::MOTargetFlag3) OS << "\"MOTargetFlag3\" "; + if (getFlags() & MachineMemOperand::MOTargetFlag4) + OS << "\"MOTargetFlag4\" "; } assert((isLoad() || isStore()) && diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 6f636a1..394b99b 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -407,9 +407,11 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const { // Since we are in SSA form, we can use the first definition. def_instr_iterator I = def_instr_begin(Reg); - assert((I.atEnd() || std::next(I) == def_instr_end()) && - "getVRegDef assumes a single definition or no definition"); - return !I.atEnd() ? &*I : nullptr; + if (I == def_instr_end()) + return nullptr; + assert(std::next(I) == def_instr_end() && + "getVRegDef assumes at most one definition"); + return &*I; } /// getUniqueVRegDef - Return the unique machine instr that defines the diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 6576f97..021c1a0 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -683,11 +683,10 @@ struct DataDep { DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { assert(Register::isVirtualRegister(VirtReg)); - MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); - assert(!DefI.atEnd() && "Register has no defs"); - DefMI = DefI->getParent(); - DefOp = DefI.getOperandNo(); - assert((++DefI).atEnd() && "Register has multiple defs"); + MachineOperand *DefMO = MRI->getOneDef(VirtReg); + assert(DefMO && "Register does not have unique def"); + DefMI = DefMO->getParent(); + DefOp = DefMO->getOperandNo(); } }; diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index 2f7cfdd..badfd9a6 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -98,12 +98,6 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; - - private: - bool enablePostRAScheduler( - const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -259,13 +253,8 @@ LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { } #endif -bool PostRAScheduler::enablePostRAScheduler( - const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { - Mode = ST.getAntiDepBreakMode(); - ST.getCriticalPathRCs(CriticalPathRCs); - +static bool enablePostRAScheduler(const TargetSubtargetInfo &ST, + CodeGenOptLevel OptLevel) { // Check for explicit enable/disable of post-ra scheduling. if (EnablePostRAScheduler.getPosition() > 0) return EnablePostRAScheduler; @@ -278,24 +267,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; - TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); - AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + const auto &Subtarget = Fn.getSubtarget(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); - - RegClassInfo.runOnMachineFunction(Fn); - - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = - TargetSubtargetInfo::ANTIDEP_NONE; - SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; - // Check that post-RA scheduling is enabled for this target. - // This may upgrade the AntiDepMode. - if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), - AntiDepMode, CriticalPathRCs)) + if (!enablePostRAScheduler(Subtarget, PassConfig->getOptLevel())) return false; - // Check for antidep breaking override... + TII = Subtarget.getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + Subtarget.getAntiDepBreakMode(); if (EnableAntiDepBreaking.getPosition() > 0) { AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtargetInfo::ANTIDEP_ALL @@ -303,6 +285,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { ? TargetSubtargetInfo::ANTIDEP_CRITICAL : TargetSubtargetInfo::ANTIDEP_NONE); } + SmallVector<const TargetRegisterClass *, 4> CriticalPathRCs; + Subtarget.getCriticalPathRCs(CriticalPathRCs); + RegClassInfo.runOnMachineFunction(Fn); LLVM_DEBUG(dbgs() << "PostRAScheduler\n"); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 79b0fa6..3ab6315 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -30,22 +30,22 @@ static bool isValidRegUse(const MachineOperand &MO) { return isValidReg(MO) && MO.isUse(); } -static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg, +static bool isValidRegUseOf(const MachineOperand &MO, MCRegister Reg, const TargetRegisterInfo *TRI) { if (!isValidRegUse(MO)) return false; - return TRI->regsOverlap(MO.getReg(), PhysReg); + return TRI->regsOverlap(MO.getReg(), Reg); } static bool isValidRegDef(const MachineOperand &MO) { return isValidReg(MO) && MO.isDef(); } -static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg, +static bool isValidRegDefOf(const MachineOperand &MO, MCRegister Reg, const TargetRegisterInfo *TRI) { if (!isValidRegDef(MO)) return false; - return TRI->regsOverlap(MO.getReg(), PhysReg); + return TRI->regsOverlap(MO.getReg(), Reg); } void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { @@ -261,7 +261,7 @@ void ReachingDefAnalysis::traverse() { } int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); int InstId = InstIds.lookup(MI); int DefRes = ReachingDefDefaultVal; @@ -269,7 +269,7 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, assert(MBBNumber < MBBReachingDefs.numBlockIDs() && "Unexpected basic block number."); int LatestDef = ReachingDefDefaultVal; - for (MCRegUnit Unit : TRI->regunits(PhysReg)) { + for (MCRegUnit Unit : TRI->regunits(Reg)) { for (int Def : MBBReachingDefs.defs(MBBNumber, Unit)) { if (Def >= InstId) break; @@ -280,22 +280,21 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, return LatestDef; } -MachineInstr * -ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI, - MCRegister PhysReg) const { - return hasLocalDefBefore(MI, PhysReg) - ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)) - : nullptr; +MachineInstr *ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI, + MCRegister Reg) const { + return hasLocalDefBefore(MI, Reg) + ? getInstFromId(MI->getParent(), getReachingDef(MI, Reg)) + : nullptr; } bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, - MCRegister PhysReg) const { + MCRegister Reg) const { MachineBasicBlock *ParentA = A->getParent(); MachineBasicBlock *ParentB = B->getParent(); if (ParentA != ParentB) return false; - return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); + return getReachingDef(A, Reg) == getReachingDef(B, Reg); } MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, @@ -318,19 +317,18 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, return nullptr; } -int ReachingDefAnalysis::getClearance(MachineInstr *MI, - MCRegister PhysReg) const { +int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCRegister Reg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); - return InstIds.lookup(MI) - getReachingDef(MI, PhysReg); + return InstIds.lookup(MI) - getReachingDef(MI, Reg); } bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, - MCRegister PhysReg) const { - return getReachingDef(MI, PhysReg) >= 0; + MCRegister Reg) const { + return getReachingDef(MI, Reg) >= 0; } void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, - MCRegister PhysReg, + MCRegister Reg, InstSet &Uses) const { MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); @@ -340,11 +338,11 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, // If/when we find a new reaching def, we know that there's no more uses // of 'Def'. - if (getReachingLocalMIDef(&*MI, PhysReg) != Def) + if (getReachingLocalMIDef(&*MI, Reg) != Def) return; for (auto &MO : MI->operands()) { - if (!isValidRegUseOf(MO, PhysReg, TRI)) + if (!isValidRegUseOf(MO, Reg, TRI)) continue; Uses.insert(&*MI); @@ -354,15 +352,14 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, } } -bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, - MCRegister PhysReg, +bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, MCRegister Reg, InstSet &Uses) const { for (MachineInstr &MI : instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) { for (auto &MO : MI.operands()) { - if (!isValidRegUseOf(MO, PhysReg, TRI)) + if (!isValidRegUseOf(MO, Reg, TRI)) continue; - if (getReachingDef(&MI, PhysReg) >= 0) + if (getReachingDef(&MI, Reg) >= 0) return false; Uses.insert(&MI); } @@ -370,18 +367,18 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, auto Last = MBB->getLastNonDebugInstr(); if (Last == MBB->end()) return true; - return isReachingDefLiveOut(&*Last, PhysReg); + return isReachingDefLiveOut(&*Last, Reg); } -void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg, +void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister Reg, InstSet &Uses) const { MachineBasicBlock *MBB = MI->getParent(); // Collect the uses that each def touches within the block. - getReachingLocalUses(MI, PhysReg, Uses); + getReachingLocalUses(MI, Reg, Uses); // Handle live-out values. - if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), PhysReg)) { + if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), Reg)) { if (LiveOut != MI) return; @@ -389,9 +386,9 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg, SmallPtrSet<MachineBasicBlock*, 4>Visited; while (!ToVisit.empty()) { MachineBasicBlock *MBB = ToVisit.pop_back_val(); - if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg)) + if (Visited.count(MBB) || !MBB->isLiveIn(Reg)) continue; - if (getLiveInUses(MBB, PhysReg, Uses)) + if (getLiveInUses(MBB, Reg, Uses)) llvm::append_range(ToVisit, MBB->successors()); Visited.insert(MBB); } @@ -399,25 +396,25 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg, } void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI, - MCRegister PhysReg, + MCRegister Reg, InstSet &Defs) const { - if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) { + if (auto *Def = getUniqueReachingMIDef(MI, Reg)) { Defs.insert(Def); return; } for (auto *MBB : MI->getParent()->predecessors()) - getLiveOuts(MBB, PhysReg, Defs); + getLiveOuts(MBB, Reg, Defs); } -void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, - MCRegister PhysReg, InstSet &Defs) const { +void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, MCRegister Reg, + InstSet &Defs) const { SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs; - getLiveOuts(MBB, PhysReg, Defs, VisitedBBs); + getLiveOuts(MBB, Reg, Defs, VisitedBBs); } -void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, - MCRegister PhysReg, InstSet &Defs, +void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, MCRegister Reg, + InstSet &Defs, BlockSet &VisitedBBs) const { if (VisitedBBs.count(MBB)) return; @@ -425,28 +422,28 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, VisitedBBs.insert(MBB); LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(Reg)) return; - if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) + if (auto *Def = getLocalLiveOutMIDef(MBB, Reg)) Defs.insert(Def); else for (auto *Pred : MBB->predecessors()) - getLiveOuts(Pred, PhysReg, Defs, VisitedBBs); + getLiveOuts(Pred, Reg, Defs, VisitedBBs); } MachineInstr * ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { // If there's a local def before MI, return it. - MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg); + MachineInstr *LocalDef = getReachingLocalMIDef(MI, Reg); if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI)) return LocalDef; SmallPtrSet<MachineInstr*, 2> Incoming; MachineBasicBlock *Parent = MI->getParent(); for (auto *Pred : Parent->predecessors()) - getLiveOuts(Pred, PhysReg, Incoming); + getLiveOuts(Pred, Reg, Incoming); // Check that we have a single incoming value and that it does not // come from the same block as MI - since it would mean that the def @@ -469,13 +466,13 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, } bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { MachineBasicBlock *MBB = MI->getParent(); LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); // Yes if the register is live out of the basic block. - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(Reg)) return true; // Walk backwards through the block to see if the register is live at some @@ -483,62 +480,61 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, for (MachineInstr &Last : instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { LiveRegs.stepBackward(Last); - if (!LiveRegs.available(PhysReg)) + if (!LiveRegs.available(Reg)) return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; } bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { MachineBasicBlock *MBB = MI->getParent(); auto Last = MBB->getLastNonDebugInstr(); if (Last != MBB->end() && - getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg)) + getReachingDef(MI, Reg) != getReachingDef(&*Last, Reg)) return true; - if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) - return Def == getReachingLocalMIDef(MI, PhysReg); + if (auto *Def = getLocalLiveOutMIDef(MBB, Reg)) + return Def == getReachingLocalMIDef(MI, Reg); return false; } bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { MachineBasicBlock *MBB = MI->getParent(); LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(Reg)) return false; auto Last = MBB->getLastNonDebugInstr(); - int Def = getReachingDef(MI, PhysReg); - if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def) + int Def = getReachingDef(MI, Reg); + if (Last != MBB->end() && getReachingDef(&*Last, Reg) != Def) return false; // Finally check that the last instruction doesn't redefine the register. for (auto &MO : Last->operands()) - if (isValidRegDefOf(MO, PhysReg, TRI)) + if (isValidRegDefOf(MO, Reg, TRI)) return false; return true; } -MachineInstr * -ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, - MCRegister PhysReg) const { +MachineInstr *ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + MCRegister Reg) const { LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(PhysReg)) + if (LiveRegs.available(Reg)) return nullptr; auto Last = MBB->getLastNonDebugInstr(); if (Last == MBB->end()) return nullptr; - int Def = getReachingDef(&*Last, PhysReg); + int Def = getReachingDef(&*Last, Reg); for (auto &MO : Last->operands()) - if (isValidRegDefOf(MO, PhysReg, TRI)) + if (isValidRegDefOf(MO, Reg, TRI)) return &*Last; return Def < 0 ? nullptr : getInstFromId(MBB, Def); @@ -650,7 +646,7 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited, void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, InstSet &Dead) const { Dead.insert(MI); - auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) { + auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister Reg) { if (mayHaveSideEffects(*Def)) return false; @@ -666,7 +662,7 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, return false; SmallPtrSet<MachineInstr*, 4> Uses; - getGlobalUses(Def, PhysReg, Uses); + getGlobalUses(Def, Reg, Uses); return llvm::set_is_subset(Uses, Dead); }; @@ -680,18 +676,18 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, } bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, - MCRegister PhysReg) const { + MCRegister Reg) const { SmallPtrSet<MachineInstr*, 1> Ignore; - return isSafeToDefRegAt(MI, PhysReg, Ignore); + return isSafeToDefRegAt(MI, Reg, Ignore); } -bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, +bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister Reg, InstSet &Ignore) const { // Check for any uses of the register after MI. - if (isRegUsedAfter(MI, PhysReg)) { - if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) { + if (isRegUsedAfter(MI, Reg)) { + if (auto *Def = getReachingLocalMIDef(MI, Reg)) { SmallPtrSet<MachineInstr*, 2> Uses; - getGlobalUses(Def, PhysReg, Uses); + getGlobalUses(Def, Reg, Uses); if (!llvm::set_is_subset(Uses, Ignore)) return false; } else @@ -700,13 +696,13 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, MachineBasicBlock *MBB = MI->getParent(); // Check for any defs after MI. - if (isRegDefinedAfter(MI, PhysReg)) { + if (isRegDefinedAfter(MI, Reg)) { auto I = MachineBasicBlock::iterator(MI); for (auto E = MBB->end(); I != E; ++I) { if (Ignore.count(&*I)) continue; for (auto &MO : I->operands()) - if (isValidRegDefOf(MO, PhysReg, TRI)) + if (isValidRegDefOf(MO, Reg, TRI)) return false; } } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 4fa2bc7..b94992c 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -140,7 +140,7 @@ static cl::opt<bool> GreedyReverseLocalAssignment( static cl::opt<unsigned> SplitThresholdForRegWithHint( "split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " - "percentate"), + "percentage"), cl::init(75), cl::Hidden); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", @@ -376,6 +376,12 @@ unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const { return Prio; } +unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const { + // Prioritize by virtual register number, lowest first. + Register Reg = LI.reg(); + return ~Reg.virtRegIndex(); +} + const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { @@ -2029,6 +2035,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg, // available colors. Matrix->assign(VirtReg, PhysReg); + // VirtReg may be deleted during tryRecoloringCandidates, save a copy. + Register ThisVirtReg = VirtReg.reg(); + // Save the current recoloring state. // If we cannot recolor all the interferences, we will have to start again // at this point for the next physical register. @@ -2040,8 +2049,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg, NewVRegs.push_back(NewVReg); // Do not mess up with the global assignment process. // I.e., VirtReg must be unassigned. - Matrix->unassign(VirtReg); - return PhysReg; + if (VRM->hasPhys(ThisVirtReg)) { + Matrix->unassign(VirtReg); + return PhysReg; + } + + // It is possible VirtReg will be deleted during tryRecoloringCandidates. + LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register " + << printReg(ThisVirtReg) << '\n'); + FixedRegisters.erase(ThisVirtReg); + return 0; } LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp index 0650aaf..4525b8f 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp @@ -30,7 +30,10 @@ static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode( clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, - "development", "for training"))); + "development", "for training"), + clEnumValN( + RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy", + "prioritize low virtual register numbers for test and debug"))); char RegAllocPriorityAdvisorAnalysis::ID = 0; INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", @@ -67,6 +70,31 @@ private: } const bool NotAsRequested; }; + +class DummyPriorityAdvisorAnalysis final + : public RegAllocPriorityAdvisorAnalysis { +public: + DummyPriorityAdvisorAnalysis() + : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {} + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Dummy; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<SlotIndexesWrapperPass>(); + RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + } + + std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + return std::make_unique<DummyPriorityAdvisor>( + MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI()); + } +}; + } // namespace template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() { @@ -75,6 +103,9 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() { case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default: Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false); break; + case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy: + Ret = new DummyPriorityAdvisorAnalysis(); + break; case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development: #if defined(LLVM_HAVE_TFLITE) Ret = createDevelopmentModePriorityAdvisor(); @@ -97,6 +128,8 @@ StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const { return "Release mode Regalloc Priority Advisor"; case AdvisorMode::Development: return "Development mode Regalloc Priority Advisor"; + case AdvisorMode::Dummy: + return "Dummy Regalloc Priority Advisor"; } llvm_unreachable("Unknown advisor kind"); } diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h index 1e9fa96..32e4598 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h @@ -56,9 +56,21 @@ private: unsigned getPriority(const LiveInterval &LI) const override; }; +/// Stupid priority advisor which just enqueues in virtual register number +/// order, for debug purposes only. +class DummyPriorityAdvisor : public RegAllocPriorityAdvisor { +public: + DummyPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes) + : RegAllocPriorityAdvisor(MF, RA, Indexes) {} + +private: + unsigned getPriority(const LiveInterval &LI) const override; +}; + class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { public: - enum class AdvisorMode : int { Default, Release, Development }; + enum class AdvisorMode : int { Default, Release, Development, Dummy }; RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) : ImmutablePass(ID), Mode(Mode){}; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 20ad644..8313927 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -113,7 +113,7 @@ static cl::opt<unsigned> LargeIntervalSizeThreshold( static cl::opt<unsigned> LargeIntervalFreqThreshold( "large-interval-freq-threshold", cl::Hidden, - cl::desc("For a large interval, if it is coalesed with other live " + cl::desc("For a large interval, if it is coalesced with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256)); @@ -1325,11 +1325,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; - // Only support subregister destinations when the def is read-undef. - MachineOperand &DstOperand = CopyMI->getOperand(0); - Register CopyDstReg = DstOperand.getReg(); - if (DstOperand.getSubReg() && !DstOperand.isUndef()) - return false; // If both SrcIdx and DstIdx are set, correct rematerialization would widen // the register substantially (beyond both source and dest size). This is bad @@ -1339,6 +1334,32 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (SrcIdx && DstIdx) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + Register CopyDstReg = DstOperand.getReg(); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; + + // In the physical register case, checking that the def is read-undef is not + // enough. We're widening the def and need to avoid clobbering other live + // values in the unused register pieces. + // + // TODO: Targets may support rewriting the rematerialized instruction to only + // touch relevant lanes, in which case we don't need any liveness check. + if (CopyDstReg.isPhysical() && CP.isPartial()) { + for (MCRegUnit Unit : TRI->regunits(DstReg)) { + // Ignore the register units we are writing anyway. + if (is_contained(TRI->regunits(CopyDstReg), Unit)) + continue; + + // Check if the other lanes we are defining are live at the + // rematerialization point. + LiveRange &LR = LIS->getRegUnit(Unit); + if (LR.liveAt(CopyIdx)) + return false; + } + } + const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { @@ -1375,27 +1396,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewMI.setDebugLoc(DL); // In a situation like the following: - // - // undef %2.subreg:reg = INST %1:reg ; DefMI (rematerializable), - // ; DefSubIdx = subreg - // %3:reg = COPY %2 ; SrcIdx = DstIdx = 0 - // .... = SOMEINSTR %3:reg - // - // there are no subranges for %3 so after rematerialization we need - // to explicitly create them. Undefined subranges are removed later on. - if (DstReg.isVirtual() && DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() && - MRI->shouldTrackSubRegLiveness(DstReg)) { - LiveInterval &DstInt = LIS->getInterval(DstReg); - if (!DstInt.hasSubRanges()) { - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg); - LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx); - LaneBitmask UnusedLanes = FullMask & ~UsedLanes; - DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UsedLanes, DstInt); - DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UnusedLanes, DstInt); - } - } - - // In a situation like the following: // %0:subreg = instr ; DefMI, subreg = DstIdx // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 // instead of widening %1 to the register class of %0 simply do: @@ -1523,6 +1523,27 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // sure that "undef" is not set. if (NewIdx == 0) NewMI.getOperand(0).setIsUndef(false); + + // In a situation like the following: + // + // undef %2.subreg:reg = INST %1:reg ; DefMI (rematerializable), + // ; Defines only some of lanes, + // ; so DefSubIdx = NewIdx = subreg + // %3:reg = COPY %2 ; Copy full reg + // .... = SOMEINSTR %3:reg ; Use full reg + // + // there are no subranges for %3 so after rematerialization we need + // to explicitly create them. Undefined subranges are removed later on. + if (NewIdx && !DstInt.hasSubRanges() && + MRI->shouldTrackSubRegLiveness(DstReg)) { + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(NewIdx); + LaneBitmask UnusedLanes = FullMask & ~UsedLanes; + VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); + DstInt.createSubRangeFrom(Alloc, UsedLanes, DstInt); + DstInt.createSubRangeFrom(Alloc, UnusedLanes, DstInt); + } + // Add dead subregister definitions if we are defining the whole register // but only part of it is live. // This could happen if the rematerialization instruction is rematerializing diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 7b927e6..bfc49dd 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -1044,6 +1044,18 @@ bool SelectOptimizeImpl::isConvertToBranchProfitableBase( return true; } + // If latch has a select group with several elements, it is usually profitable + // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if + // conversion is profitable for innermost loops. + auto *BB = SI.getI()->getParent(); + auto *L = LI->getLoopFor(BB); + if (L && !L->isInnermost() && L->getLoopLatch() == BB && + ASI.Selects.size() >= 3) { + OR << "Converted to branch because select group in the latch block is big."; + EmitAndPrintRemark(ORE, OR); + return true; + } + ORmiss << "Not profitable to convert to branch (base heuristic)."; EmitAndPrintRemark(ORE, ORmiss); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6cbfef2..da3c834 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -141,7 +141,7 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth( static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable( "combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), - cl::desc("DAG combiner force override the narrowing profitable check when" + cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( @@ -3949,6 +3949,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; + // Similar to the previous rule, but this time targeting an expanded abs. + // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) + // as well as + // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X)) + // Note that these two are applicable to both signed and unsigned min/max. + SDValue X; + SDValue S0; + auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0)); + if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), + m_UMax(m_Value(X), NegPat), + m_SMin(m_Value(X), NegPat), + m_UMin(m_Value(X), NegPat))))) { + unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode()); + if (hasOperation(NewOpc, VT)) + return DAG.getNode(NewOpc, DL, VT, X, S0); + } + // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); @@ -20438,10 +20455,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { Value.hasOneUse()) { LoadSDNode *LD = cast<LoadSDNode>(Value); EVT VT = LD->getMemoryVT(); - if (!VT.isFloatingPoint() || - VT != ST->getMemoryVT() || - LD->isNonTemporal() || - ST->isNonTemporal() || + if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() || + LD->isNonTemporal() || ST->isNonTemporal() || LD->getPointerInfo().getAddrSpace() != 0 || ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); @@ -23088,8 +23103,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT); + // TODO: Add support for SCALAR_TO_VECTOR implicit truncation. if (LegalTypes && BCSrc.getValueType().isInteger() && - BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR && + BCSrc.getScalarValueSizeInBits() == + BCSrc.getOperand(0).getScalarValueSizeInBits()) { // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> // trunc i64 X to i32 SDValue X = BCSrc.getOperand(0); @@ -24288,8 +24306,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); // Keep track of what we encounter. - bool AnyInteger = false; - bool AnyFP = false; + EVT AnyFPVT; + for (const SDValue &Op : N->ops()) { if (ISD::BITCAST == Op.getOpcode() && !Op.getOperand(0).getValueType().isVector()) @@ -24303,27 +24321,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { // If it's neither, bail out, it could be something weird like x86mmx. EVT LastOpVT = Ops.back().getValueType(); if (LastOpVT.isFloatingPoint()) - AnyFP = true; - else if (LastOpVT.isInteger()) - AnyInteger = true; - else + AnyFPVT = LastOpVT; + else if (!LastOpVT.isInteger()) return SDValue(); } // If any of the operands is a floating point scalar bitcast to a vector, // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. - if (AnyFP) { - SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); - if (AnyInteger) { - for (SDValue &Op : Ops) { - if (Op.getValueType() == SVT) - continue; - if (Op.isUndef()) - Op = DAG.getNode(ISD::UNDEF, DL, SVT); - else - Op = DAG.getBitcast(SVT, Op); - } + if (AnyFPVT != EVT()) { + SVT = AnyFPVT; + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.isUndef()) + Op = DAG.getNode(ISD::UNDEF, DL, SVT); + else + Op = DAG.getBitcast(SVT, Op); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index db21e70..89a00c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -1081,6 +1083,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::FMAXIMUM: Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); return; + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: + Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG)); + return; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -1738,7 +1744,8 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, bool IsStrict = Node->isStrictFPOpcode(); unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Node->getOperand(OpNo); - EVT VT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); SDLoc DL(Node); // Attempt to expand using TargetLowering. @@ -1752,11 +1759,11 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, } // Make sure that the SINT_TO_FP and SRL instructions are available. - if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) == + if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Expand) || - (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) == + (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) == TargetLowering::Expand)) || - TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) { + TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) { if (IsStrict) { UnrollStrictFPOp(Node, Results); return; @@ -1766,37 +1773,59 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, return; } - unsigned BW = VT.getScalarSizeInBits(); + unsigned BW = SrcVT.getScalarSizeInBits(); assert((BW == 64 || BW == 32) && "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); - SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); + // If STRICT_/FMUL is not supported by the target (in case of f16) replace the + // UINT_TO_FP with a larger float and round to the smaller type + if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || + (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { + EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; + SDValue UIToFP; + SDValue Result; + SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); + EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); + if (IsStrict) { + UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, + {Node->getOperand(0), Src}); + Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, + {Node->getOperand(0), UIToFP, TargetZero}); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + } else { + UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); + Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); + Results.push_back(Result); + } + + return; + } + + SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; - SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT); // Two to the power of half-word-size. - SDValue TWOHW = - DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0)); + SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT); // Clear upper part of LO, lower HI - SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord); - SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask); + SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask); if (IsStrict) { // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, - {Node->getValueType(0), MVT::Other}, + SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, {Node->getOperand(0), HI}); - fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, + fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other}, {fHI.getValue(1), fHI, TWOHW}); - SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, - {Node->getValueType(0), MVT::Other}, + SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, {Node->getOperand(0), LO}); SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), @@ -1804,8 +1833,7 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, // Add the two halves SDValue Result = - DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, - {TF, fHI, fLO}); + DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO}); Results.push_back(Result); Results.push_back(Result.getValue(1)); @@ -1815,13 +1843,12 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI); - fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW); - SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO); + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI); + fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO); // Add the two halves - Results.push_back( - DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO)); + Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO)); } SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { @@ -2246,11 +2273,13 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { DAG.getVectorIdxConstant(i, dl)); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, DAG.getVectorIdxConstant(i, dl)); + // FIXME: We should use i1 setcc + boolext here, but it causes regressions. Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT), + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getBoolConstant(true, dl, EltVT, VT), DAG.getConstant(0, dl, EltVT)); } return DAG.getBuildVector(VT, dl, Ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107454a..780eba1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::FLDEXP: case ISD::ABDS: case ISD::ABDU: diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 9e5867c..51ee3cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -125,9 +125,9 @@ static cl::opt<int> MaxReorderWindow( cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp")); -static cl::opt<unsigned> AvgIPC( - "sched-avg-ipc", cl::Hidden, cl::init(1), - cl::desc("Average inst/cycle whan no target itinerary exists.")); +static cl::opt<unsigned> + AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle when no target itinerary exists.")); namespace { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 26fc75c..dff7243 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together"); // without a target itinerary. The choice of number here has more to do with // balancing scheduler heuristics than with the actual machine latency. static cl::opt<int> HighLatencyCycles( - "sched-high-latency-cycles", cl::Hidden, cl::init(10), - cl::desc("Roughly estimate the number of cycles that 'long latency'" - "instructions take for targets with no itinerary")); + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency' " + "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 10e8ba9..0dfd030 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate( return true; } +ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + default: + llvm_unreachable("unrecognized opcode"); + case ISD::UMIN: + return ISD::UMAX; + case ISD::UMAX: + return ISD::UMIN; + case ISD::SMIN: + return ISD::SMAX; + case ISD::SMAX: + return ISD::SMIN; + } +} + ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { switch (VecReduceOpcode) { default: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e87d809..9f57884 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8435,7 +8435,6 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return false; SDLoc dl(SDValue(Node, 0)); - EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation performs rounding @@ -8448,7 +8447,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT); SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); - SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl); SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 687acd9..8437422 100644 --- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -106,8 +106,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " - << MF.getName() << " **********\n"); TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; @@ -121,6 +119,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { /// Performs the actual liveness calculation for the function. bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << MF.getName() << " **********\n"); bool HasChanged = false; // For all basic blocks in the function. for (auto &MBB : MF) { diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 74a94d6..decffdc 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -259,7 +259,7 @@ void SwiftErrorValueTracking::propagateVRegs() { for (const auto &Use : VRegUpwardsUse) { const MachineBasicBlock *UseBB = Use.first.first; Register VReg = Use.second; - if (!MRI.def_begin(VReg).atEnd()) + if (!MRI.def_empty(VReg)) continue; #ifdef EXPENSIVE_CHECKS diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d407e9f..5c05589 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -113,8 +113,6 @@ static cl::opt<bool> EnableImplicitNullChecks( static cl::opt<bool> DisableMergeICmps("disable-mergeicmps", cl::desc("Disable MergeICmps Pass"), cl::init(false), cl::Hidden); -static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, - cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); @@ -503,7 +501,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(DisableCGP) SET_BOOLEAN_OPTION(DisablePartialLibcallInlining) SET_BOOLEAN_OPTION(DisableSelectOptimize) - SET_BOOLEAN_OPTION(PrintLSR) SET_BOOLEAN_OPTION(PrintISelInput) SET_BOOLEAN_OPTION(DebugifyAndStripAll) SET_BOOLEAN_OPTION(DebugifyCheckAndStripAll) @@ -836,9 +833,6 @@ void TargetPassConfig::addIRPasses() { addPass(createLoopStrengthReducePass()); if (EnableLoopTermFold) addPass(createLoopTermFoldPass()); - if (PrintLSR) - addPass(createPrintFunctionPass(dbgs(), - "\n\n*** Code after LSR ***\n")); } // The MergeICmpsPass tries to create memcmp calls by grouping sequences of |